#Load libraries needed. 
library(easypackages)
libraries("rgdal", "gdalUtils", "rgeos", "raster", "exactextractr", "maptools", "tigris", "ggplot2", "cowplot", "dplyr", "sp", "sf", "rgdal", "stringr", "RColorBrewer", "ggcorrplot", "pander", "forcats", "ggsn", "mapproj", "rlist", "lubridate", "ggpubr")

Merging NO2 data with COVID data for analysis.

Preparing shapefiles.

#Reading in world countries shapefile.
data(wrld_simpl)
wrld_simpl@data[["NAME"]]
##   [1] Antigua and Barbuda                      
##   [2] Algeria                                  
##   [3] Azerbaijan                               
##   [4] Albania                                  
##   [5] Armenia                                  
##   [6] Angola                                   
##   [7] American Samoa                           
##   [8] Argentina                                
##   [9] Australia                                
##  [10] Bahrain                                  
##  [11] Barbados                                 
##  [12] Bermuda                                  
##  [13] Bahamas                                  
##  [14] Bangladesh                               
##  [15] Belize                                   
##  [16] Bosnia and Herzegovina                   
##  [17] Bolivia                                  
##  [18] Burma                                    
##  [19] Benin                                    
##  [20] Solomon Islands                          
##  [21] Brazil                                   
##  [22] Bulgaria                                 
##  [23] Brunei Darussalam                        
##  [24] Canada                                   
##  [25] Cambodia                                 
##  [26] Sri Lanka                                
##  [27] Congo                                    
##  [28] Democratic Republic of the Congo         
##  [29] Burundi                                  
##  [30] China                                    
##  [31] Afghanistan                              
##  [32] Bhutan                                   
##  [33] Chile                                    
##  [34] Cayman Islands                           
##  [35] Cameroon                                 
##  [36] Chad                                     
##  [37] Comoros                                  
##  [38] Colombia                                 
##  [39] Costa Rica                               
##  [40] Central African Republic                 
##  [41] Cuba                                     
##  [42] Cape Verde                               
##  [43] Cook Islands                             
##  [44] Cyprus                                   
##  [45] Denmark                                  
##  [46] Djibouti                                 
##  [47] Dominica                                 
##  [48] Dominican Republic                       
##  [49] Ecuador                                  
##  [50] Egypt                                    
##  [51] Ireland                                  
##  [52] Equatorial Guinea                        
##  [53] Estonia                                  
##  [54] Eritrea                                  
##  [55] El Salvador                              
##  [56] Ethiopia                                 
##  [57] Austria                                  
##  [58] Czech Republic                           
##  [59] French Guiana                            
##  [60] Finland                                  
##  [61] Fiji                                     
##  [62] Falkland Islands (Malvinas)              
##  [63] Micronesia, Federated States of          
##  [64] French Polynesia                         
##  [65] France                                   
##  [66] Gambia                                   
##  [67] Gabon                                    
##  [68] Georgia                                  
##  [69] Ghana                                    
##  [70] Grenada                                  
##  [71] Greenland                                
##  [72] Germany                                  
##  [73] Guam                                     
##  [74] Greece                                   
##  [75] Guatemala                                
##  [76] Guinea                                   
##  [77] Guyana                                   
##  [78] Haiti                                    
##  [79] Honduras                                 
##  [80] Croatia                                  
##  [81] Hungary                                  
##  [82] Iceland                                  
##  [83] India                                    
##  [84] Iran (Islamic Republic of)               
##  [85] Israel                                   
##  [86] Italy                                    
##  [87] Cote d'Ivoire                            
##  [88] Iraq                                     
##  [89] Japan                                    
##  [90] Jamaica                                  
##  [91] Jordan                                   
##  [92] Kenya                                    
##  [93] Kyrgyzstan                               
##  [94] Korea, Democratic People's Republic of   
##  [95] Kiribati                                 
##  [96] Korea, Republic of                       
##  [97] Kuwait                                   
##  [98] Kazakhstan                               
##  [99] Lao People's Democratic Republic         
## [100] Lebanon                                  
## [101] Latvia                                   
## [102] Belarus                                  
## [103] Lithuania                                
## [104] Liberia                                  
## [105] Slovakia                                 
## [106] Liechtenstein                            
## [107] Libyan Arab Jamahiriya                   
## [108] Madagascar                               
## [109] Martinique                               
## [110] Mongolia                                 
## [111] Montserrat                               
## [112] The former Yugoslav Republic of Macedonia
## [113] Mali                                     
## [114] Morocco                                  
## [115] Mauritius                                
## [116] Mauritania                               
## [117] Malta                                    
## [118] Oman                                     
## [119] Maldives                                 
## [120] Mexico                                   
## [121] Malaysia                                 
## [122] Mozambique                               
## [123] Malawi                                   
## [124] New Caledonia                            
## [125] Niue                                     
## [126] Niger                                    
## [127] Aruba                                    
## [128] Anguilla                                 
## [129] Belgium                                  
## [130] Hong Kong                                
## [131] Northern Mariana Islands                 
## [132] Faroe Islands                            
## [133] Andorra                                  
## [134] Gibraltar                                
## [135] Isle of Man                              
## [136] Luxembourg                               
## [137] Macau                                    
## [138] Monaco                                   
## [139] Palestine                                
## [140] Montenegro                               
## [141] Mayotte                                  
## [142] Aaland Islands                           
## [143] Norfolk Island                           
## [144] Cocos (Keeling) Islands                  
## [145] Antarctica                               
## [146] Bouvet Island                            
## [147] French Southern and Antarctic Lands      
## [148] Heard Island and McDonald Islands        
## [149] British Indian Ocean Territory           
## [150] Christmas Island                         
## [151] United States Minor Outlying Islands     
## [152] Vanuatu                                  
## [153] Nigeria                                  
## [154] Netherlands                              
## [155] Norway                                   
## [156] Nepal                                    
## [157] Nauru                                    
## [158] Suriname                                 
## [159] Nicaragua                                
## [160] New Zealand                              
## [161] Paraguay                                 
## [162] Peru                                     
## [163] Pakistan                                 
## [164] Poland                                   
## [165] Panama                                   
## [166] Portugal                                 
## [167] Papua New Guinea                         
## [168] Guinea-Bissau                            
## [169] Qatar                                    
## [170] Reunion                                  
## [171] Romania                                  
## [172] Republic of Moldova                      
## [173] Philippines                              
## [174] Puerto Rico                              
## [175] Russia                                   
## [176] Rwanda                                   
## [177] Saudi Arabia                             
## [178] Saint Kitts and Nevis                    
## [179] Seychelles                               
## [180] South Africa                             
## [181] Lesotho                                  
## [182] Botswana                                 
## [183] Senegal                                  
## [184] Slovenia                                 
## [185] Sierra Leone                             
## [186] Singapore                                
## [187] Somalia                                  
## [188] Spain                                    
## [189] Saint Lucia                              
## [190] Sudan                                    
## [191] Sweden                                   
## [192] Syrian Arab Republic                     
## [193] Switzerland                              
## [194] Trinidad and Tobago                      
## [195] Thailand                                 
## [196] Tajikistan                               
## [197] Tokelau                                  
## [198] Tonga                                    
## [199] Togo                                     
## [200] Sao Tome and Principe                    
## [201] Tunisia                                  
## [202] Turkey                                   
## [203] Tuvalu                                   
## [204] Turkmenistan                             
## [205] United Republic of Tanzania              
## [206] Uganda                                   
## [207] United Kingdom                           
## [208] Ukraine                                  
## [209] United States                            
## [210] Burkina Faso                             
## [211] Uruguay                                  
## [212] Uzbekistan                               
## [213] Saint Vincent and the Grenadines         
## [214] Venezuela                                
## [215] British Virgin Islands                   
## [216] Viet Nam                                 
## [217] United States Virgin Islands             
## [218] Namibia                                  
## [219] Wallis and Futuna Islands                
## [220] Samoa                                    
## [221] Swaziland                                
## [222] Yemen                                    
## [223] Zambia                                   
## [224] Zimbabwe                                 
## [225] Indonesia                                
## [226] Guadeloupe                               
## [227] Netherlands Antilles                     
## [228] United Arab Emirates                     
## [229] Timor-Leste                              
## [230] Pitcairn Islands                         
## [231] Palau                                    
## [232] Marshall Islands                         
## [233] Saint Pierre and Miquelon                
## [234] Saint Helena                             
## [235] San Marino                               
## [236] Turks and Caicos Islands                 
## [237] Western Sahara                           
## [238] Serbia                                   
## [239] Holy See (Vatican City)                  
## [240] Svalbard                                 
## [241] Saint Martin                             
## [242] Saint Barthelemy                         
## [243] Guernsey                                 
## [244] Jersey                                   
## [245] South Georgia South Sandwich Islands     
## [246] Taiwan                                   
## 246 Levels: Aaland Islands Afghanistan Albania Algeria ... Zimbabwe
plot(wrld_simpl) 

#Reading in urban areas shapefile. 
world.sf <- st_read("urban_shapefile/ne_10m_urban_areas_landscan.shp", crs = 4979)  # Read shapefile as an sf object
## Reading layer `ne_10m_urban_areas_landscan' from data source `/Users/alanaschreibman/COVID-19_NO2/urban_shapefile/ne_10m_urban_areas_landscan.shp' using driver `ESRI Shapefile'
## Simple feature collection with 6018 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -175.2333 ymin: -54.85 xmax: 178.5333 ymax: 77.49167
## Geodetic CRS:  WGS 84
us.urban.sf <- st_read("us_urban_shapefile/tl_2017_us_uac10.shp", crs = 4979)  # Read shapefile as an sf object
## Reading layer `tl_2017_us_uac10' from data source `/Users/alanaschreibman/COVID-19_NO2/us_urban_shapefile/tl_2017_us_uac10.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3601 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -170.7892 ymin: -14.37378 xmax: 145.7909 ymax: 71.30684
## Geodetic CRS:  WGS 84

Preparing COVID data.

#Filtering COVID data (from COVID exploratory analysis.)
owid <- read.csv(url("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"), na.strings = c("", "NA"), colClasses = c("date" = "Date"), header = TRUE)
owid.filtered <- owid %>%
                  rename(iso.code = iso_code, totcases = total_cases, totdeaths = total_deaths, totcases.mil = total_cases_per_million, totdeaths.mil = total_deaths_per_million, pop.density = population_density, med.age = median_age, older70 = aged_70_older, gdp.cap = gdp_per_capita, card.death = cardiovasc_death_rate, diab.prev = diabetes_prevalence, fem.smokers = female_smokers, male.smokers = male_smokers) %>%
                  select(c(iso.code:totcases, totdeaths, totcases.mil, totdeaths.mil, population, pop.density, med.age, older70, gdp.cap, card.death, diab.prev, fem.smokers, male.smokers)) %>% #Renamed variables and filtered to keep only those of interest. 
                  filter(date == "2021-06-28") #Filtered out all but most recent date (most recent date is a running sum of total cases).
table(owid.filtered$continent, useNA = "ifany") #Data includes both continent and country-specific outcomes. Continent outcomes can be distinguished by "NA" in the continent column; the continent name is in the location column. 
## 
##        Africa          Asia        Europe North America       Oceania 
##            54            48            51            33            16 
## South America          <NA> 
##            12             9
cfr.df <- data.frame("case.fatality.rate"= owid.filtered$totdeaths / owid.filtered$totcases, "location" = owid.filtered$location)
owid.filtered <- merge(owid.filtered, cfr.df, by = "location")
  rm(cfr.df)
owid.countries <- owid.filtered %>%
                    filter(!is.na(continent)) #Removed all rows with NA for continent, leaving behind only countries. 

#Filtering to get weekly averages for new cases and new deaths
dates.covid.weekly <- seq(as.Date("2020-01-20"), as.Date("2021-07-04"), by=7) 

owid.time.series.fxn <- function(t) {
  df <- owid %>%
  select(location, date, newcases = new_cases, newdeaths = new_deaths) %>%
  filter(location=="World") %>%
  filter(date %in% (as.Date(t):(as.Date(t) + 6)))
  df
}

owid.time.series.list <- purrr::map(dates.covid.weekly, owid.time.series.fxn) #Creates list of data frames, each corresponding to a particular week during lockdown. 

covid.mean.fxn <- function(x, column, lab) {
  df <- data.frame(owid.time.series.list[[x]])
  mean <- mean(df[[column]], na.rm = T)
  Date <- as.Date(dates.covid.weekly[[x]])
  Week <- paste0(as.character(as.Date(dates.covid.weekly[[x]])), " to ", as.character(as.Date(dates.covid.weekly[[x]]) + 6)) 
  col.lab <- as.character(paste0("Mean_Weekly_", lab))
  df <- data.frame(Date, Week, mean) 
  names(df)[3] <- as.character(paste0("Mean_Weekly_", lab))
  df
}

x <- 1:76
mean.newcases.df <- purrr::pmap_dfr(list(x, "newcases", "Cases"), covid.mean.fxn)
mean.newdeaths.df <- purrr::pmap_dfr(list(x, "newdeaths", "Deaths"), covid.mean.fxn)

Creating mosaic of half-world raster files and reading in world satellite data.

setwd("./World_Rasters")
left <- raster('Left_Half_World_NO2.tif') %>%
  flip(direction='y') 
right <- raster('Right_Half_World_NO2.tif') %>%
  flip(direction='y') 
x <- list(left, right)
names(x)[1:2] <- c('x', 'y')
x$fun <- mean
x$na.rm <- TRUE
world.NO2.r <- do.call(mosaic, x)
rm(left, right, x)

#Converting to data frame for plotting. 
r.crop <- crop(world.NO2.r, extent(wrld_simpl))
r.crop <- mask(r.crop, wrld_simpl)
r.pts <- rasterToPoints(r.crop, spatial = TRUE)
world.NO2.df <- data.frame(r.pts)
rm(r.pts, r.crop)

myPalette <- colorRampPalette(brewer.pal(9, "YlOrRd")) 

my_theme <- function() {
  theme_minimal() +                                  # shorthand for white background color
  theme(axis.line = element_blank(),                 # further customization of theme components
        axis.text = element_blank(),                 # remove x and y axis text and labels
        axis.title = element_blank(),
        panel.grid = element_line(color = "white"),  # make grid lines invisible
        legend.key.size = unit(0.5, "cm"),           # increase size of legend
        legend.text = element_text(size = 7),       # increase legend text size
        legend.title = element_text(size = 7))      # increase legend title size
}

ggplot() +
 geom_raster(data = world.NO2.df, aes(x = x, y = y, fill = layer)) + 
 geom_polygon(data = wrld_simpl, aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle("World NO2 Concentrations 07-10-2018 to 07-10-2019") +
  #scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100))  +
    north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
    scalebar(x.min = -180, y.min = -90, x.max = 180, y.max = 90, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 2, model = 'WGS84') +
  coord_equal()

#R base plots.
plot(world.NO2.r)
plot(wrld_simpl, add = TRUE)

Preparing data for world analysis.

wrld_simpl@data$NAME <- as.character(wrld_simpl@data$NAME)

#Changing shapefile country names to match COVID dataset:
wrld_simpl@data$NAME[wrld_simpl$NAME=="Brunei Darussalam"] <- "Brunei"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Czech Republic"] <- "Czechia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Democratic Republic of the Congo"] <- "Democratic Republic of Congo"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Faroe Islands"] <- "Faeroe Islands"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Iran (Islamic Republic of)"] <- "Iran"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Lao People's Democratic Republic"] <- "Laos"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Libyan Arab Jamahiriya"] <- "Libya"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Republic of Moldova"] <- "Moldova"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Burma"] <- "Myanmar"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Republic of"] <- "South Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Korea, Democratic People's Republic of"] <- "North Korea"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Syrian Arab Republic"] <- "Syria"
wrld_simpl@data$NAME[wrld_simpl$NAME=="The former Yugoslav Republic of Macedonia"] <- "North Macedonia"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Macau"] <- "Macao"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Micronesia, Federated States of"] <- "Micronesia (country)"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Timor-Leste"] <- "Timor"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Holy See (Vatican City)"] <- "Vatican"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Viet Nam"] <- "Vietnam"
wrld_simpl@data$NAME[wrld_simpl$NAME=="Wallis and Futuna Islands"] <- "Wallis and Futuna"

#Missing from shapefiles but in OWID data: Kosovo, Tanzania, Eswatini
owid.countries$location[!(owid.countries$location %in% wrld_simpl@data$NAME)]
## [1] "Curacao"                   "Eswatini"                 
## [3] "Kosovo"                    "Sint Maarten (Dutch part)"
## [5] "South Sudan"               "Tanzania"
#Missing from OWID data but in shapefiles: 
wrld_simpl@data$NAME[!(wrld_simpl@data$NAME%in% owid.countries$location)]
##  [1] "American Samoa"                      
##  [2] "French Guiana"                       
##  [3] "Falkland Islands (Malvinas)"         
##  [4] "Guam"                                
##  [5] "North Korea"                         
##  [6] "Martinique"                          
##  [7] "Northern Mariana Islands"            
##  [8] "Mayotte"                             
##  [9] "Aaland Islands"                      
## [10] "Norfolk Island"                      
## [11] "Cocos (Keeling) Islands"             
## [12] "Antarctica"                          
## [13] "Bouvet Island"                       
## [14] "French Southern and Antarctic Lands" 
## [15] "Heard Island and McDonald Islands"   
## [16] "British Indian Ocean Territory"      
## [17] "Christmas Island"                    
## [18] "United States Minor Outlying Islands"
## [19] "Reunion"                             
## [20] "Puerto Rico"                         
## [21] "Tokelau"                             
## [22] "Tonga"                               
## [23] "Tuvalu"                              
## [24] "Turkmenistan"                        
## [25] "United Republic of Tanzania"         
## [26] "United States Virgin Islands"        
## [27] "Swaziland"                           
## [28] "Guadeloupe"                          
## [29] "Netherlands Antilles"                
## [30] "Pitcairn Islands"                    
## [31] "Palau"                               
## [32] "Saint Pierre and Miquelon"           
## [33] "Saint Helena"                        
## [34] "Western Sahara"                      
## [35] "Svalbard"                            
## [36] "Saint Martin"                        
## [37] "Saint Barthelemy"                    
## [38] "South Georgia South Sandwich Islands"
#Function to extract mean NO2 values by country shapefile. 
extraction.fxn <- function(x) {
  r.vals <- raster::extract(world.NO2.r, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
  r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
  final.df <- data.frame(wrld_simpl@data[wrld_simpl$NAME==as.character(x), ], r.mean)  
  rm(r.mean, r.vals)
  names(final.df)[5] <- "location" 
  names(final.df)[6] <- "Area" 
  names(final.df)[10] <- "Longitude" 
  names(final.df)[11] <- "Latitude" 
  names(final.df)[12] <- "NO2_Concentration"
  final.df <- final.df %>% dplyr::select(location, Area, Longitude, Latitude, NO2_Concentration)
}

#Extracting mean NO2 values and adding to coordinates data to make final data frame.
country.shp.list <- wrld_simpl@data$NAME
x <- country.shp.list
total.df <- purrr::map_dfr(x, extraction.fxn)

#Read in IHME regions data. 
IHME.regions.df <- read.csv(file = 'IHME_regions.csv', header = TRUE) 
IHME.regions.df <- IHME.regions.df %>% rename(location = Countries) %>% dplyr::select("Super.region", "Regions", "location")

#Merged geospatial data set with COVID data (variables come from COVID exploratory analysis RMD file). 
covid.NO2.df <- right_join(IHME.regions.df, total.df, by = "location")
covid.NO2.df <- full_join(covid.NO2.df, owid.countries, by = "location")
covid.NO2.df$Regions <- as.factor(covid.NO2.df$Regions)
covid.NO2.df$Super.region <- as.factor(covid.NO2.df$Super.region)

Weighting by population by country for data frame.

#isolate country using extract and crop for both pop and NO2
#calculate product of population and pollution for each cell using overlay function 
#sum all of these products 
#divide this sum by tot pop for country from df

pop.den.r <- raster("gpw_v4_population_density/pop.density.tif") #Read in GPW v4 population raster data.  

pop.weighting.fxn <- function(x) {
  pop.cropped <- crop(pop.den.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
  pop.cropped <- mask(pop.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
  NO2.cropped <- crop(world.NO2.r, extent(wrld_simpl[wrld_simpl$NAME==as.character(x), ]))
  NO2.cropped <- mask(NO2.cropped, wrld_simpl[wrld_simpl$NAME==as.character(x), ])
  overlay.cropped <- overlay(pop.cropped, NO2.cropped, fun=function(x,y){(x*y)} )
  sum <- cellStats(overlay.cropped, stat='sum')
  mod.covid.df <- filter(covid.NO2.df, location == as.character(x))
  weighted.NO2 <- sum / mod.covid.df$population %>% data.frame()
  df <- wrld_simpl@data[wrld_simpl$NAME==as.character(x), ]
  weighted.NO2.df <- merge(df, weighted.NO2)
  names(weighted.NO2.df)[5] <- "location" 
  names(weighted.NO2.df)[12] <- "pop.weighted.NO2"
  weighted.NO2.df <- weighted.NO2.df %>% dplyr::select(location, pop.weighted.NO2)
} #Function to weight NO2 by population for each country. 
x <- country.shp.list[country.shp.list!="Antarctica"]

weighted.NO2.df <- purrr::map_dfr(x, pop.weighting.fxn)
weighted.covid.NO2.df <- full_join(covid.NO2.df, weighted.NO2.df, by = "location") #Join all data frames.
write.csv(weighted.covid.NO2.df, file = "weighted.covid.NO2.df.csv")
rm(weighted.covid.NO2.df)

Plotting weighted data.

weighted.covid.NO2.df <- read.csv(file = 'weighted.covid.NO2.df.csv')

wrld.df <- as.data.frame(wrld_simpl)
wrld.fort <- fortify(wrld_simpl, region = "NAME")
names(wrld.fort)[6] <- "location" #Getting world shapefile into data frame for ggplot. 

covid.NO2.plot.df <- full_join(wrld.fort, weighted.covid.NO2.df, by ="location")
covid.NO2.plot.df <- arrange(covid.NO2.plot.df, order) #Joined full weighted COVID data set with shapefile data set. 

map <- ggplot(data = covid.NO2.plot.df, aes(x = long, y = lat, group = group))

map + 
  geom_polygon(aes(fill = NO2_Concentration), color = 'black', size = 0.1) +
  my_theme() + 
  ggtitle("World Raw NO2 Concentrations 07-10-2018 to 07-10-2019") +
  scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100))  +
    north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
    scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
  coord_equal()

  #Plot of raw NO2 concentrations. 

map + 
  geom_polygon(aes(fill = pop.weighted.NO2), color = 'black', size = 0.1) +
  my_theme() + 
  ggtitle("World Weighted NO2 Concentrations 07-10-2018 to 07-10-2019") +
  scale_fill_gradientn(name = "Weighted NO2 \nConcentration", colours = myPalette(100))  +
    north(x.min = -180, y.min = -90, x.max = 180, y.max = 90, anchor = c(x=-140, y=-35), symbol=12) +
    scalebar(covid.NO2.plot.df, dist = 1000, dist_unit = 'km', transform = TRUE, anchor = c(x=-140, y=-60), st.size = 4, model = 'WGS84') +
  coord_equal() #Plot of population weighted NO2. 

Grouping data by median.

high.covid <- filter(weighted.covid.NO2.df, totcases.mil >= median(weighted.covid.NO2.df$totcases.mil, na.rm = T))
low.covid <- filter(weighted.covid.NO2.df, totcases.mil < median(weighted.covid.NO2.df$totcases.mil, na.rm = T))
high.NO2 <- filter(weighted.covid.NO2.df, pop.weighted.NO2 >= median(weighted.covid.NO2.df$pop.weighted.NO2, na.rm = T))
low.NO2 <- filter(weighted.covid.NO2.df, pop.weighted.NO2 < median(weighted.covid.NO2.df$pop.weighted.NO2, na.rm = T))

Exploratory Analysis of NO2 and COVID-19 variables.

Univariate analysis of world NO2 concentrations.

#Statistics on world NO2 concentrations. 
covid.NO2.df %>%
    summarize(variable = "NO2_Concentration", mean_NO2 = mean(NO2_Concentration, na.rm = T), st_dev_cty = sd(NO2_Concentration, na.rm = T)) %>%
    pander
variable mean_NO2 st_dev_cty
NO2_Concentration 1.692e-05 1.103e-05
covid.NO2.df %>%
    summarize(variable = "NO2_Concentration",
              q0.2 = quantile(NO2_Concentration, 0.2, na.rm = T),
              q0.4 = quantile(NO2_Concentration, 0.4, na.rm = T),
              q0.6 = quantile(NO2_Concentration, 0.6, na.rm = T),
              q0.8 = quantile(NO2_Concentration, 0.8, na.rm = T)) %>%
    pander
variable q0.2 q0.4 q0.6 q0.8
NO2_Concentration 8.389e-06 1.204e-05 1.728e-05 2.352e-05
#Histogram of NO2 concentrations.
covid.NO2.df %>%
    ggplot(aes(NO2_Concentration)) +
    geom_histogram(color = "black",fill = "grey") +
    geom_vline(xintercept = mean(covid.NO2.df$NO2_Concentration), lwd = 2) +
    labs(title = "Distribution of Tropospheric NO2",
         x = "NO2 Concentration (mol/m^2)",
         y = "Number of Countries") +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1))

Univariate analysis of world COVID statistics.

#Statistics on world COVID case statistics. 
covid.NO2.df %>%
    summarize(variable = "totcases.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
    pander
variable mean_cases st_dev_cty
totcases.mil 35928 3611805
covid.NO2.df %>%
    summarize(variable = "totcases.mil",
              q0.2 = quantile(totcases.mil, 0.2, na.rm = TRUE),
              q0.4 = quantile(totcases.mil, 0.4, na.rm = TRUE),
              q0.6 = quantile(totcases.mil, 0.6, na.rm = TRUE),
              q0.8 = quantile(totcases.mil, 0.8, na.rm = TRUE)) %>%
    pander
variable q0.2 q0.4 q0.6 q0.8
totcases.mil 1662 8186 33123 73388
#Histogram of COVID case statistics.
max(covid.NO2.df$totcases.mil, na.rm = TRUE)
## [1] 179667.4
covid.NO2.df %>%
    ggplot(aes(totcases.mil)) +
    geom_histogram(color = "black",fill = "grey") +
    geom_vline(xintercept = mean(covid.NO2.df$totcases.mil, na.rm = T), lwd = 2) +
    labs(title = "Distribution of World COVID-19 Cases per Million",
         x = "Total COVID-19 Cases per Million",
         y = "Number of Countries") +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1))

#Statistics on world COVID death statistics. 
covid.NO2.df %>%
    summarize(variable = "totdeaths.mil", mean_cases = mean(totcases.mil, na.rm = T), st_dev_cty = sd(totcases, na.rm = T)) %>%
    pander
variable mean_cases st_dev_cty
totdeaths.mil 35928 3611805
covid.NO2.df %>%
    summarize(variable = "totdeaths.mil",
              q0.2 = quantile(totdeaths.mil, 0.2, na.rm = TRUE),
              q0.4 = quantile(totdeaths.mil, 0.4, na.rm = TRUE),
              q0.6 = quantile(totdeaths.mil, 0.6, na.rm = TRUE),
              q0.8 = quantile(totdeaths.mil, 0.8, na.rm = TRUE)) %>%
    pander
variable q0.2 q0.4 q0.6 q0.8
totdeaths.mil 27.78 132.2 536.1 1310
#Histogram of COVID statistics.
max(covid.NO2.df$totdeaths.mil, na.rm = TRUE)
## [1] 5820.087
covid.NO2.df %>%
    ggplot(aes(totdeaths.mil)) +
    geom_histogram(color = "black",fill = "grey") +
    geom_vline(xintercept = mean(covid.NO2.df$totdeaths.mil, na.rm = TRUE), lwd = 2) +
    labs(title = "Distribution of World COVID-19 Deaths per Million",
         x = "Total COVID-19 Deaths per Million",
         y = "Number of Countries") +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0,5820.087,500)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1))

Bivariate analysis of NO2 concentrations, COVID data, and categorical variables (super region divisions)

#Summary statistics, NO2, grouping by Super.region
covid.NO2.df %>% 
  group_by(Super.region) %>%
  summarize(mean_NO2 = mean(NO2_Concentration, na.rm = TRUE), sd_NO2 = sd(NO2_Concentration, na.rm = TRUE)) %>%
  pander
Super.region mean_NO2 sd_NO2
Central Europe, Eastern Europe, and Central Asia 2.195e-05 7.39e-06
High-income 2.472e-05 1.7e-05
Latin America and Caribbean 1.132e-05 3.362e-06
North Africa and the Middle East 2.084e-05 9.85e-06
South Asia 2.998e-05 5.271e-06
Southeast Asia, East Asia, and Oceania 1.347e-05 1.135e-05
Sub-Saharan Africa 1.7e-05 6.46e-06
NA 1.149e-05 8.931e-06
#Summary statistics, COVID cases, grouping by Super.region
covid.NO2.df %>% 
  group_by(Super.region) %>%
  summarize(mean_cases.mil = mean(totcases.mil, na.rm = TRUE), sd_cases.mil = sd(totcases.mil, na.rm = TRUE)) %>%
  pander
Super.region mean_cases.mil sd_cases.mil
Central Europe, Eastern Europe, and Central Asia 68485 39342
High-income 63327 42290
Latin America and Caribbean 30927 26009
North Africa and the Middle East 42067 39554
South Asia 11243 9764
Southeast Asia, East Asia, and Oceania 16298 41810
Sub-Saharan Africa 6235 11443
NA 41135 48023
#Summary statistics, COVID deaths, grouping by Super.region
covid.NO2.df %>% 
  group_by(Super.region) %>%
  summarize(mean_deaths.mil = mean(totdeaths.mil, na.rm = TRUE), sd_deaths.mil = sd(totdeaths.mil, na.rm = TRUE)) %>%
  pander
Super.region mean_deaths.mil sd_deaths.mil
Central Europe, Eastern Europe, and Central Asia 1450 956.1
High-income 1014 728
Latin America and Caribbean 935.3 1165
North Africa and the Middle East 466.9 379.1
South Asia 157.5 135.2
Southeast Asia, East Asia, and Oceania 112.8 178.8
Sub-Saharan Africa 105.6 192.2
NA 765.5 906.5
#Histogram, NO2, grouping by Super.region
covid.NO2.df %>%
    ggplot(aes(NO2_Concentration)) +
    geom_histogram(color = "black",fill = "grey") +
    labs(title = "Distribution of NO2 Concentrations Relative to Super Region",
         x = "NO2 Concentration (mol/m^2)",
         y = "Number of Countries") +
    theme_minimal() +
     scale_x_continuous(breaks = seq(0,8E-5,0.5E-5)) +
     scale_y_continuous(breaks = seq(0,12,2)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    facet_grid(Super.region~.)

#Histogram, COVID cases, grouping by Super.region
covid.NO2.df %>%
    ggplot(aes(totcases.mil)) +
    geom_histogram(color = "black",fill = "grey") +
    labs(title = "Distribution of COVID Cases Relative to Super Region",
         x = "COVID-19 Cases per Million",
         y = "Number of Countries") +
    theme_minimal() +
     scale_x_continuous(breaks = seq(0, 179667.4,10000)) +
     scale_y_continuous(breaks = seq(0,32,8)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    facet_grid(Super.region~.)

#Histogram, COVID deaths, grouping by Super.region
covid.NO2.df %>%
    ggplot(aes(totdeaths.mil)) +
    geom_histogram(color = "black",fill = "grey") +
    labs(title = "Distribution of COVID Deaths Relative to Super Region",
         x = "COVID-19 Deaths per Million",
         y = "Number of Countries") +
    theme_minimal() +
     scale_x_continuous(breaks = seq(0,5820.087,500)) +
     scale_y_continuous(breaks = seq(0,32,8)) +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    facet_grid(Super.region~.)

#Boxplot, NO2, grouping by Super.region 
covid.NO2.df %>%
    ggplot(aes(Super.region,NO2_Concentration)) +
    geom_boxplot() +
    geom_jitter(color="red", size=0.4, alpha=0.9) +
    labs(title = "Distribution of NO2 relative to Super Region",
         x = "Super Region",
         y = "NO2 Concentration (mol/m^2)") +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    scale_y_continuous(breaks = seq(0,8E-5,0.5E-5)) 

#Boxplot, COVID cases, grouping by Super.region 
covid.NO2.df %>%
    ggplot(aes(Super.region,totcases.mil)) +
    geom_boxplot() +
    geom_jitter(color="red", size=0.4, alpha=0.9) +
    labs(title = "Distribution of COVID Cases Relative to Super Region",
         x = "Super Region",
         y = "COVID-19 Cases per Million") +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    scale_y_continuous(breaks = seq(0, 179667.4,10000))

#Boxplot, COVID deaths, grouping by Super.region 
covid.NO2.df %>%
    ggplot(aes(Super.region,totdeaths.mil)) +
    geom_boxplot() +
    geom_jitter(color="red", size=0.4, alpha=0.9) +
    labs(title = "Distribution of COVID Deaths Relative to Super Region",
         x = "Super Region",
         y = "COVID-19 Deaths per Million") +
    theme(axis.text.x = element_text(angle = 45,hjust=1)) +
    scale_y_continuous(breaks = seq(0,5820.087,500))

Bivariate analysis of NO2 concentrations and all continuous variables.

covid.NO2.df %>%
    select(NO2_Concentration, totcases.mil, totdeaths.mil, case.fatality.rate, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers) %>% 
    cor(use="pairwise.complete.obs") %>% 
    pander
Table continues below
  NO2_Concentration totcases.mil totdeaths.mil
NO2_Concentration 1 0.3165 0.2235
totcases.mil 0.3165 1 0.7135
totdeaths.mil 0.2235 0.7135 1
case.fatality.rate -0.1053 -0.1403 0.1426
pop.density 0.2045 0.0449 -0.03193
med.age 0.3898 0.5935 0.5198
card.death -0.234 -0.2975 -0.2144
diab.prev -0.2264 0.02145 -0.01058
fem.smokers 0.2739 0.5659 0.5681
male.smokers 0.04505 0.09914 0.07643
Table continues below
  case.fatality.rate pop.density med.age
NO2_Concentration -0.1053 0.2045 0.3898
totcases.mil -0.1403 0.0449 0.5935
totdeaths.mil 0.1426 -0.03193 0.5198
case.fatality.rate 1 -0.07071 -0.1202
pop.density -0.07071 1 0.1484
med.age -0.1202 0.1484 1
card.death 0.223 -0.1755 -0.3436
diab.prev 0.02785 0.01338 0.1462
fem.smokers -0.05579 -0.04709 0.6391
male.smokers -0.01647 0.0001981 0.1816
  card.death diab.prev fem.smokers male.smokers
NO2_Concentration -0.234 -0.2264 0.2739 0.04505
totcases.mil -0.2975 0.02145 0.5659 0.09914
totdeaths.mil -0.2144 -0.01058 0.5681 0.07643
case.fatality.rate 0.223 0.02785 -0.05579 -0.01647
pop.density -0.1755 0.01338 -0.04709 0.0001981
med.age -0.3436 0.1462 0.6391 0.1816
card.death 1 0.1519 -0.1427 0.4285
diab.prev 0.1519 1 0.09079 0.2061
fem.smokers -0.1427 0.09079 1 0.2261
male.smokers 0.4285 0.2061 0.2261 1
#Visual representation of correlations.
covid.NO2.df %>%
    select(NO2_Concentration, totcases.mil, totdeaths.mil, case.fatality.rate, pop.density, med.age, card.death, diab.prev, fem.smokers, male.smokers)  %>%
    cor(use="pairwise.complete.obs") %>%
    ggcorrplot(type = "lower", ggtheme = theme_minimal, colors = c("#6D9EC1","white","#E46726"),
               show.diag = T,
               lab = T, lab_size = 2, #shows correlation values and sets size of text
               title = "Correlation Matrix for the covid.NO2 dataset",
               legend.title = "Correlation Value",
               outline.color = "white",
               hc.order = T) #orders by variables most related

Bivariate graphs of NO2 concentrations and COVID-19 outcomes.

#NO2 concentration vs COVID cases. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1, size = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") 

cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -90810 -28883 -12346  22261 137741 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.364e+04  5.741e+03   2.376   0.0185 *  
## NO2_Concentration 1.211e+09  2.660e+08   4.551 9.62e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39170 on 186 degrees of freedom
##   (64 observations deleted due to missingness)
## Multiple R-squared:  0.1002, Adjusted R-squared:  0.09536 
## F-statistic: 20.71 on 1 and 186 DF,  p-value: 9.621e-06
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                          2.5 %       97.5 %
## (Intercept)       2.313445e+03 2.496619e+04
## NO2_Concentration 6.858510e+08 1.735461e+09
#NO2 concentration vs COVID deaths. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1, size = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") 

cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.223474
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1484.4  -571.6  -372.9   387.4  5333.7 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       3.332e+02  1.284e+02   2.596  0.01021 * 
## NO2_Concentration 1.797e+07  5.858e+06   3.067  0.00249 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 841 on 179 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.04994,    Adjusted R-squared:  0.04463 
## F-statistic: 9.409 on 1 and 179 DF,  p-value: 0.002494
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                          2.5 %       97.5 %
## (Intercept)       7.992266e+01 5.864711e+02
## NO2_Concentration 6.409097e+06 2.952679e+07
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")

#Function for bivariate analysis by region. 
cases.NO2 <- function(x) { 
                    covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    filter(!is.na(totdeaths.mil)) %>%
                    filter(!is.na(NO2_Concentration)) %>%
                    ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
                        geom_point(color = "#333aff", size = 1) +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
                        labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
} 
NO2.lm.cases <- function(x) {
                        covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        filter(!is.na(totdeaths.mil)) %>%
                        filter(!is.na(NO2_Concentration)) %>%
                        lm(totcases.mil ~ NO2_Concentration,.) %>%
                        summary.lm()  
} #Function for linear regression statistics to be included in for loop below. 

for (i in regions.list) {
   print(cases.NO2(i)) 
   print(NO2.lm.cases(i)) 
} #Loops function for continents of interest. 

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -78182 -33098   9082  23797 114373 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           52150      13590   3.838 0.000573 ***
## NO2_Concentration 438853382  448383204   0.979 0.335282    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42320 on 31 degrees of freedom
## Multiple R-squared:  0.02998,    Adjusted R-squared:  -0.001316 
## F-statistic: 0.9579 on 1 and 31 DF,  p-value: 0.3353

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32144 -16754  -7505   2170  62157 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            43205      17159   2.518   0.0183 *
## NO2_Concentration -987233958 1439547473  -0.686   0.4989  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26160 on 26 degrees of freedom
## Multiple R-squared:  0.01777,    Adjusted R-squared:  -0.02001 
## F-statistic: 0.4703 on 1 and 26 DF,  p-value: 0.4989

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9740  -5594  -3283  -1344  48613 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            13633       6738   2.023   0.0496 *
## NO2_Concentration -454287265  399732943  -1.136   0.2624  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11400 on 41 degrees of freedom
## Multiple R-squared:  0.03054,    Adjusted R-squared:  0.006895 
## F-statistic: 1.292 on 1 and 41 DF,  p-value: 0.2624

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -43562 -23508 -12602  19146  94637 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            -4333      17595  -0.246   0.8081   
## NO2_Concentration 2226431334  766638920   2.904   0.0091 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33770 on 19 degrees of freedom
## Multiple R-squared:  0.3074, Adjusted R-squared:  0.271 
## F-statistic: 8.434 on 1 and 19 DF,  p-value: 0.009095

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##     1     2     3     4     5 
## -9195 -7454  9986 10206 -3543 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)            -2922      31378  -0.093    0.932
## NO2_Concentration  472550653 1034079847   0.457    0.679
## 
## Residual standard error: 10900 on 3 degrees of freedom
## Multiple R-squared:  0.06508,    Adjusted R-squared:  -0.2466 
## F-statistic: 0.2088 on 1 and 3 DF,  p-value: 0.6787

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -36541 -25947 -10363    168 121820 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        4.611e+04  1.930e+04   2.389   0.0296 *
## NO2_Concentration -1.636e+09  1.047e+09  -1.563   0.1376  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44650 on 16 degrees of freedom
## Multiple R-squared:  0.1324, Adjusted R-squared:  0.07822 
## F-statistic: 2.443 on 1 and 16 DF,  p-value: 0.1376

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56913 -26986  -7456  21585  92022 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       7.361e+03  2.029e+04   0.363  0.71967   
## NO2_Concentration 2.772e+09  8.726e+08   3.177  0.00382 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34030 on 26 degrees of freedom
## Multiple R-squared:  0.2796, Adjusted R-squared:  0.2519 
## F-statistic: 10.09 on 1 and 26 DF,  p-value: 0.003815
deaths.NO2 <- function(x) { 
                    covid.NO2.df %>% 
                    filter(Super.region == x) %>% 
                    filter(!is.na(totdeaths.mil)) %>%
                    filter(!is.na(NO2_Concentration)) %>%
                    ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
                        geom_point(color = "#333aff", size = 1) +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
} 
NO2.lm.deaths <- function(x) {
                        covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        filter(!is.na(totdeaths.mil)) %>%
                        filter(!is.na(NO2_Concentration)) %>%
                        lm(totdeaths.mil ~ NO2_Concentration,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(deaths.NO2(i)) 
   print(NO2.lm.deaths(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1227.49  -777.20    60.23   659.17  1133.86 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           856.1      235.1   3.641 0.000979 ***
## NO2_Concentration 6198622.6  7757195.6   0.799 0.430324    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 732.1 on 31 degrees of freedom
## Multiple R-squared:  0.02018,    Adjusted R-squared:  -0.01143 
## F-statistic: 0.6385 on 1 and 31 DF,  p-value: 0.4303

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -958.7 -603.5 -376.9  335.6 4779.2 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.352e+03  7.743e+02   1.746   0.0925 .
## NO2_Concentration -3.652e+07  6.496e+07  -0.562   0.5788  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1180 on 26 degrees of freedom
## Multiple R-squared:  0.01201,    Adjusted R-squared:  -0.02599 
## F-statistic: 0.3161 on 1 and 26 DF,  p-value: 0.5788

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -105.40  -86.65  -69.60  -11.17  915.43 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)            126.8      114.9   1.104    0.276
## NO2_Concentration -1301481.1  6816614.0  -0.191    0.850
## 
## Residual standard error: 194.5 on 41 degrees of freedom
## Multiple R-squared:  0.0008883,  Adjusted R-squared:  -0.02348 
## F-statistic: 0.03645 on 1 and 41 DF,  p-value: 0.8495

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -460.2 -226.6 -129.6  173.6  861.3 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       1.023e+02  1.805e+02   0.567   0.5775  
## NO2_Concentration 1.749e+07  7.864e+06   2.225   0.0384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 346.4 on 19 degrees of freedom
## Multiple R-squared:  0.2066, Adjusted R-squared:  0.1649 
## F-statistic: 4.949 on 1 and 19 DF,  p-value: 0.03843

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##       1       2       3       4       5 
## -108.53 -143.81  122.43  149.24  -19.33 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.313e-01  4.400e+02   0.001    1.000
## NO2_Concentration 5.247e+06  1.450e+07   0.362    0.741
## 
## Residual standard error: 152.9 on 3 degrees of freedom
## Multiple R-squared:  0.04182,    Adjusted R-squared:  -0.2776 
## F-statistic: 0.1309 on 1 and 3 DF,  p-value: 0.7414

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -172.33 -125.39  -27.02   49.87  515.71 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        2.137e+02  7.379e+01   2.897   0.0105 *
## NO2_Concentration -6.528e+06  4.001e+06  -1.632   0.1222  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 170.7 on 16 degrees of freedom
## Multiple R-squared:  0.1427, Adjusted R-squared:  0.0891 
## F-statistic: 2.663 on 1 and 16 DF,  p-value: 0.1222

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1217.34  -503.24    13.44   411.01  1779.79 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -440.9      429.5  -1.026    0.314    
## NO2_Concentration 85772376.4 18474796.8   4.643 8.63e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 720.4 on 26 degrees of freedom
## Multiple R-squared:  0.4533, Adjusted R-squared:  0.4322 
## F-statistic: 21.55 on 1 and 26 DF,  p-value: 8.633e-05

Bivariate graphs of population weighted NO2 concentrations and COVID-19 outcomes.

#NO2 concentration vs COVID cases. 
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1, size = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "World COVID-19 Cases per Million People vs. Weighted NO2 Concentration") 

cor(weighted.covid.NO2.df$totcases.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs")
## [1] 0.1993439
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -57105 -30596 -15063  26642 151847 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.782e+04  4.250e+03   6.547 5.57e-10 ***
## pop.weighted.NO2 4.406e+12  1.588e+12   2.774   0.0061 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40460 on 186 degrees of freedom
##   (64 observations deleted due to missingness)
## Multiple R-squared:  0.03974,    Adjusted R-squared:  0.03458 
## F-statistic: 7.697 on 1 and 186 DF,  p-value: 0.006095
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                         2.5 %       97.5 %
## (Intercept)      1.943685e+04 3.620427e+04
## pop.weighted.NO2 1.273021e+12 7.539258e+12
#NO2 concentration vs COVID deaths. 
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1, size = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "World COVID-19 Deaths per Million People vs. Weighted NO2 Concentration") 

cor(weighted.covid.NO2.df$totdeaths.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs")
## [1] 0.176339
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = weighted.covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1272.7  -577.4  -403.9   348.4  5236.7 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.139e+02  9.285e+01   5.534  1.1e-07 ***
## pop.weighted.NO2 8.161e+10  3.405e+10   2.397   0.0176 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 849.3 on 179 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.0311, Adjusted R-squared:  0.02568 
## F-statistic: 5.745 on 1 and 179 DF,  p-value: 0.01757
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                         2.5 %       97.5 %
## (Intercept)      3.306167e+02 6.970753e+02
## pop.weighted.NO2 1.441993e+10 1.487972e+11
regions.list <- c("High-income", "Latin America and Caribbean", "Sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "Southeast Asia, East Asia, and Oceania", "Central Europe, Eastern Europe, and Central Asia")
sub.regions.list <- c("High-income Asia Pacific", "High-income Australasia", "High-income North America", "High-income Southern Latin America", "High-income Western Europe", "Caribbean", "Central Latin America", "Tropical Latin America", "Andean Latin America", "Central sub-Saharan Africa", "Eastern sub-Saharan Africa", "Southern sub-Saharan Africa", "Western sub-Saharan Africa", "North Africa and the Middle East", "South Asia", "East Asia", "Oceania", "Southeast Asia", "Central Asia", "Central Europe", "Eastern Europe")

#Function for bivariate analysis by region. 
cases.NO2.w <- function(x) { 
                    weighted.covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    filter(!is.na(totdeaths.mil)) %>%
                    filter(!is.na(pop.weighted.NO2)) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
                        geom_point(color = "#333aff", size = 1) +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
                        labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
} 
NO2.lm.cases.w <- function(x) {
                        weighted.covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        filter(!is.na(totdeaths.mil)) %>%
                        filter(!is.na(pop.weighted.NO2)) %>%
                        lm(totcases.mil ~ pop.weighted.NO2,.) %>%
                        summary.lm()  
} #Function for linear regression statistics to be included in for loop below. 

for (i in regions.list) {
   print(cases.NO2.w(i)) 
   print(NO2.lm.cases.w(i)) 
} #Loops function for continents of interest. 

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64931 -40260  10414  30182 112615 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.705e+04  1.263e+04   5.309 8.85e-06 ***
## pop.weighted.NO2 -1.385e+12  3.788e+12  -0.366    0.717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42870 on 31 degrees of freedom
## Multiple R-squared:  0.004294,   Adjusted R-squared:  -0.02783 
## F-statistic: 0.1337 on 1 and 31 DF,  p-value: 0.7171

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40870 -15279  -7329   5880  57138 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      2.771e+04  7.714e+03   3.592  0.00134 **
## pop.weighted.NO2 4.663e+12  6.534e+12   0.714  0.48181   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26140 on 26 degrees of freedom
## Multiple R-squared:  0.01921,    Adjusted R-squared:  -0.01851 
## F-statistic: 0.5093 on 1 and 26 DF,  p-value: 0.4818

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6736  -4698  -4014  -1864  50141 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       8.132e+03  3.848e+03   2.114   0.0407 *
## pop.weighted.NO2 -1.341e+12  2.418e+12  -0.554   0.5823  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11540 on 41 degrees of freedom
## Multiple R-squared:  0.007442,   Adjusted R-squared:  -0.01677 
## F-statistic: 0.3074 on 1 and 41 DF,  p-value: 0.5823

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41663 -36786  -6911  24056 117570 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      3.845e+04  1.178e+04   3.263  0.00409 **
## pop.weighted.NO2 1.228e+12  2.656e+12   0.462  0.64916   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40360 on 19 degrees of freedom
## Multiple R-squared:  0.01112,    Adjusted R-squared:  -0.04093 
## F-statistic: 0.2137 on 1 and 19 DF,  p-value: 0.6492

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##       1       2       3       4       5 
## -3081.2 -1309.4 11898.8   292.2 -7800.4 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -6.558e+03  1.220e+04  -0.537    0.628
## pop.weighted.NO2  5.292e+12  3.450e+12   1.534    0.223
## 
## Residual standard error: 8440 on 3 degrees of freedom
## Multiple R-squared:  0.4396, Adjusted R-squared:  0.2528 
## F-statistic: 2.353 on 1 and 3 DF,  p-value: 0.2226

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33382 -23708 -14100  -5892 123597 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       3.482e+04  1.461e+04   2.383   0.0299 *
## pop.weighted.NO2 -9.426e+12  6.723e+12  -1.402   0.1800  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45240 on 16 degrees of freedom
## Multiple R-squared:  0.1094, Adjusted R-squared:  0.05374 
## F-statistic: 1.965 on 1 and 16 DF,  p-value: 0.18

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56366 -25035    -83  21727  77966 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      2.417e+04  1.960e+04   1.233   0.2285  
## pop.weighted.NO2 1.522e+13  6.308e+12   2.413   0.0232 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36240 on 26 degrees of freedom
## Multiple R-squared:  0.183,  Adjusted R-squared:  0.1516 
## F-statistic: 5.823 on 1 and 26 DF,  p-value: 0.02317
deaths.NO2.w <- function(x) { 
                    weighted.covid.NO2.df %>% 
                    filter(Super.region == x) %>% 
                    filter(!is.na(totdeaths.mil)) %>%
                    filter(!is.na(pop.weighted.NO2)) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
                        geom_point(color = "#333aff", size = 1) +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "Weighted NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Weighted NO2 Concentration"))
} 
NO2.lm.deaths.w <- function(x) {
                        weighted.covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        filter(!is.na(totdeaths.mil)) %>%
                        filter(!is.na(pop.weighted.NO2)) %>%
                        lm(totdeaths.mil ~ pop.weighted.NO2,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(deaths.NO2.w(i)) 
   print(NO2.lm.deaths.w(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1053.79  -840.99    32.76   668.09  1157.04 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.185e+02  2.168e+02   4.236 0.000189 ***
## pop.weighted.NO2 3.551e+10  6.503e+10   0.546 0.588907    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 736.1 on 31 degrees of freedom
## Multiple R-squared:  0.009528,   Adjusted R-squared:  -0.02242 
## F-statistic: 0.2982 on 1 and 31 DF,  p-value: 0.5889

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1334.0  -611.0  -308.5   285.3  4895.3 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      7.607e+02  3.476e+02   2.188   0.0378 *
## pop.weighted.NO2 1.926e+11  2.944e+11   0.654   0.5188  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1178 on 26 degrees of freedom
## Multiple R-squared:  0.01619,    Adjusted R-squared:  -0.02165 
## F-statistic: 0.4278 on 1 and 26 DF,  p-value: 0.5188

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -149.73 -103.62  -58.94   -6.57  808.58 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)      4.223e+01  6.391e+01   0.661    0.512
## pop.weighted.NO2 4.480e+10  4.017e+10   1.115    0.271
## 
## Residual standard error: 191.7 on 41 degrees of freedom
## Multiple R-squared:  0.02945,    Adjusted R-squared:  0.005776 
## F-statistic: 1.244 on 1 and 41 DF,  p-value: 0.2712

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -425.56 -322.47  -72.51  285.37  788.37 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      4.127e+02  1.120e+02   3.684  0.00158 **
## pop.weighted.NO2 1.839e+10  2.525e+10   0.728  0.47528   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 383.6 on 19 degrees of freedom
## Multiple R-squared:  0.02716,    Adjusted R-squared:  -0.02404 
## F-statistic: 0.5305 on 1 and 19 DF,  p-value: 0.4753

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##      1      2      3      4      5 
## -27.14 -39.47 149.50 -11.88 -71.01 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -1.287e+02  1.442e+02  -0.893    0.438
## pop.weighted.NO2  8.508e+10  4.076e+10   2.087    0.128
## 
## Residual standard error: 99.72 on 3 degrees of freedom
## Multiple R-squared:  0.5922, Adjusted R-squared:  0.4563 
## F-statistic: 4.357 on 1 and 3 DF,  p-value: 0.1281

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -152.68 -121.56  -54.04   75.22  526.69 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       1.648e+02  5.641e+01   2.921    0.010 **
## pop.weighted.NO2 -3.499e+10  2.596e+10  -1.348    0.196   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 174.7 on 16 degrees of freedom
## Multiple R-squared:  0.102,  Adjusted R-squared:  0.04587 
## F-statistic: 1.817 on 1 and 16 DF,  p-value: 0.1964

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1291.22  -692.02   -88.65   581.03  1886.11 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      4.747e+02  4.858e+02   0.977   0.3374  
## pop.weighted.NO2 3.351e+11  1.563e+11   2.143   0.0416 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 898.2 on 26 degrees of freedom
## Multiple R-squared:  0.1502, Adjusted R-squared:  0.1175 
## F-statistic: 4.594 on 1 and 26 DF,  p-value: 0.04162

Bivariate graphs of NO2 concentrations and other continuous variables.

 #NO2 concentration vs population density. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = pop.density)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Population Density") +
  labs(title = "World Population Density vs. NO2 Concentration") 

cor(covid.NO2.df$pop.density, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.2045035
NO2.lm.pop <- lm(pop.density ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.pop)
## 
## Call:
## lm(formula = pop.density ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2374.0  -516.0  -231.7    -3.8 19313.8 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           -281.4      293.8  -0.958  0.33933   
## NO2_Concentration 40668664.8 13904464.2   2.925  0.00385 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2095 on 196 degrees of freedom
##   (54 observations deleted due to missingness)
## Multiple R-squared:  0.04182,    Adjusted R-squared:  0.03693 
## F-statistic: 8.555 on 1 and 196 DF,  p-value: 0.003852
confint(NO2.lm.pop) #Summary statistics for linear fit. 
##                           2.5 %       97.5 %
## (Intercept)           -860.8002 2.979961e+02
## NO2_Concentration 13247097.7417 6.809023e+07
#NO2 concentration vs median age. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = med.age)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Median Age") +
  labs(title = "World Median Age by Country vs. NO2 Concentration") 

cor(covid.NO2.df$med.age, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.3897704
NO2.lm.age <- lm(med.age ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.age)
## 
## Call:
## lm(formula = med.age ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2819  -7.6082   0.2393   6.8515  14.9838 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.437e+01  1.237e+00   19.70  < 2e-16 ***
## NO2_Concentration 3.281e+05  5.746e+04    5.71 4.53e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.4 on 182 degrees of freedom
##   (68 observations deleted due to missingness)
## Multiple R-squared:  0.1519, Adjusted R-squared:  0.1473 
## F-statistic:  32.6 on 1 and 182 DF,  p-value: 4.528e-08
confint(NO2.lm.age) #Summary statistics for linear fit.
##                          2.5 %       97.5 %
## (Intercept)           21.92602     26.80561
## NO2_Concentration 214701.10076 441432.36905
#NO2 concentration vs GDP per capita. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = gdp.cap)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "GDP per Capita") +
  labs(title = "World GDP per Capita vs. NO2 Concentration") 

cor(covid.NO2.df$gdp.cap, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.3524915
NO2.lm.gdp <- lm(gdp.cap ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.gdp)
## 
## Call:
## lm(formula = gdp.cap ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -30294 -13965  -5412   6907  88128 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            6948       2851   2.437   0.0158 *  
## NO2_Concentration 680456994  133541576   5.095 8.62e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19450 on 183 degrees of freedom
##   (67 observations deleted due to missingness)
## Multiple R-squared:  0.1243, Adjusted R-squared:  0.1195 
## F-statistic: 25.96 on 1 and 183 DF,  p-value: 8.62e-07
confint(NO2.lm.gdp) #Summary statistics for linear fit.
##                          2.5 %       97.5 %
## (Intercept)       1.322168e+03     12573.53
## NO2_Concentration 4.169779e+08 943936113.77
#NO2 concentration vs cardiovascular death. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = card.death)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Cardiovascular Death Rates") +
  labs(title = "World Cardiovascular Death Rates vs. NO2 Concentration") 

cor(covid.NO2.df$card.death, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] -0.2339947
NO2.lm.card.death <- lm(card.death ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.card.death)
## 
## Call:
## lm(formula = card.death ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -204.00  -91.69  -20.65   69.54  462.64 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.124e+02  1.746e+01  17.894  < 2e-16 ***
## NO2_Concentration -2.656e+06  8.180e+05  -3.247  0.00139 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 119.5 on 182 degrees of freedom
##   (68 observations deleted due to missingness)
## Multiple R-squared:  0.05475,    Adjusted R-squared:  0.04956 
## F-statistic: 10.54 on 1 and 182 DF,  p-value: 0.001389
confint(NO2.lm.card.death) #Summary statistics for linear fit.
##                           2.5 %        97.5 %
## (Intercept)            277.9489      346.8407
## NO2_Concentration -4270000.3363 -1041995.4937
#NO2 concentration vs diabetes prevalence. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = diab.prev)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Diabetes Prevalence Rates") +
  labs(title = "World Diabetes Prevalence Rates vs. NO2 Concentration") 

cor(covid.NO2.df$diab.prev, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] -0.2263737
NO2.lm.diab <- lm(diab.prev ~ NO2_Concentration, data = covid.NO2.df,)
summary.lm(NO2.lm.diab)
## 
## Call:
## lm(formula = diab.prev ~ NO2_Concentration, data = covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9664 -2.9376 -0.6631  2.3314 20.9495 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.014e+01  6.546e-01  15.485  < 2e-16 ***
## NO2_Concentration -9.990e+04  3.110e+04  -3.212  0.00155 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.623 on 191 degrees of freedom
##   (59 observations deleted due to missingness)
## Multiple R-squared:  0.05125,    Adjusted R-squared:  0.04628 
## F-statistic: 10.32 on 1 and 191 DF,  p-value: 0.001547
confint(NO2.lm.diab) #Summary statistics for linear fit.
##                           2.5 %       97.5 %
## (Intercept)        8.845719e+00     11.42813
## NO2_Concentration -1.612537e+05 -38551.92814

Multivariate graphs of COVID outcomes vs NO2 concentrations.

#NO2 concentration vs COVID cases. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "World COVID-19 Cases per Million People vs. NO2 Concentration") 

cor(covid.NO2.df$totcases.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.3165363
NO2.lm.cases <- lm(totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + med.age, data = covid.NO2.df)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = covid.NO2.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -78822 -14001  -4366   9912 118443 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -4.057e+04  9.193e+03  -4.413 1.80e-05 ***
## NO2_Concentration  2.755e+08  2.418e+08   1.139   0.2563    
## gdp.cap            2.713e-01  1.633e-01   1.661   0.0986 .  
## pop.density       -7.380e+00  3.021e+00  -2.443   0.0156 *  
## med.age            2.239e+03  3.517e+02   6.367 1.71e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31160 on 171 degrees of freedom
##   (76 observations deleted due to missingness)
## Multiple R-squared:  0.3929, Adjusted R-squared:  0.3787 
## F-statistic: 27.67 on 4 and 171 DF,  p-value: < 2.2e-16
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                           2.5 %        97.5 %
## (Intercept)       -5.871916e+04 -2.242532e+04
## NO2_Concentration -2.018993e+08  7.528626e+08
## gdp.cap           -5.116587e-02  5.936683e-01
## pop.density       -1.334335e+01 -1.416295e+00
## med.age            1.545231e+03  2.933764e+03
#NO2 concentration vs COVID deaths. 
ggplot(data = covid.NO2.df, aes(x = NO2_Concentration, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "World COVID-19 Deaths per Million People vs. NO2 Concentration") 

cor(covid.NO2.df$totdeaths.mil, covid.NO2.df$NO2_Concentration, use = "complete.obs") 
## [1] 0.223474
NO2.lm.deaths <- lm(totdeaths.mil ~ NO2_Concentration + gdp.cap  + pop.density + med.age, data = covid.NO2.df,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1545.1  -361.5   -88.4   211.0  5187.3 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.114e+03  2.111e+02  -5.276 4.06e-07 ***
## NO2_Concentration  4.054e+06  5.544e+06   0.731   0.4657    
## gdp.cap           -8.090e-03  3.700e-03  -2.187   0.0302 *  
## pop.density       -1.727e-01  6.841e-02  -2.524   0.0125 *  
## med.age            6.239e+01  7.971e+00   7.827 5.46e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 705.4 on 167 degrees of freedom
##   (80 observations deleted due to missingness)
## Multiple R-squared:  0.3394, Adjusted R-squared:  0.3235 
## F-statistic: 21.45 on 4 and 167 DF,  p-value: 2.72e-14
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                           2.5 %        97.5 %
## (Intercept)       -1.530810e+03 -6.970901e+02
## NO2_Concentration -6.890922e+06  1.499838e+07
## gdp.cap           -1.539412e-02 -7.854305e-04
## pop.density       -3.077398e-01 -3.761957e-02
## med.age            4.665229e+01  7.812782e+01
#Function for multivariate analysis by Super.region. 
cases.NO2 <- function(x) { 
                    covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = NO2_Concentration, y = totcases.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
                        labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
} 
NO2.lm.cases.reg <- function(x) {
                        covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        lm(totcases.mil ~ NO2_Concentration + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm()  
} #Function for linear regression statistics to be included in for loop below. 

for (i in regions.list) {
   print(cases.NO2(i)) 
   print(NO2.lm.cases.reg(i)) 
} #Loops function for continents of interest. 

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -80918 -20637  11757  21503  56252 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.415e+05  6.915e+04   2.046   0.0506 .
## NO2_Concentration  6.753e+08  4.508e+08   1.498   0.1457  
## gdp.cap           -2.958e-01  4.276e-01  -0.692   0.4950  
## pop.density       -3.902e+00  5.323e+00  -0.733   0.4699  
## med.age           -2.064e+03  1.714e+03  -1.204   0.2391  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37200 on 27 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1368, Adjusted R-squared:  0.008908 
## F-statistic:  1.07 on 4 and 27 DF,  p-value: 0.3908

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -36446  -9842     98  11484  54489 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.364e+05  6.274e+04  -2.174 0.040781 *  
## NO2_Concentration  2.462e+09  1.499e+09   1.642 0.114759    
## gdp.cap           -1.129e+00  9.615e-01  -1.174 0.252926    
## pop.density       -1.453e+02  3.437e+01  -4.228 0.000346 ***
## med.age            5.968e+03  2.104e+03   2.837 0.009595 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20580 on 22 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.4785, Adjusted R-squared:  0.3837 
## F-statistic: 5.046 on 4 and 22 DF,  p-value: 0.004862

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -12928  -5002   -554   2599  30947 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -4.130e+04  1.378e+04  -2.996  0.00486 ** 
## NO2_Concentration -4.610e+08  3.222e+08  -1.431  0.16091    
## gdp.cap            2.543e-01  3.588e-01   0.709  0.48300    
## pop.density       -5.393e+00  1.110e+01  -0.486  0.62984    
## med.age            2.794e+03  6.085e+02   4.592 4.94e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7897 on 37 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5783, Adjusted R-squared:  0.5327 
## F-statistic: 12.69 on 4 and 37 DF,  p-value: 1.35e-06

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28226 -10085  -1834   7016  51322 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -2.839e+04  2.766e+04  -1.026 0.320947    
## NO2_Concentration  6.601e+08  5.310e+08   1.243 0.232937    
## gdp.cap            3.284e-01  1.977e-01   1.661 0.117453    
## pop.density        5.675e+01  1.162e+01   4.886 0.000198 ***
## med.age            1.318e+03  1.058e+03   1.245 0.232072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20020 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7965, Adjusted R-squared:  0.7423 
## F-statistic: 14.68 on 4 and 15 DF,  p-value: 4.54e-05

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -2.033e+06        NaN     NaN      NaN
## NO2_Concentration -8.654e+10        NaN     NaN      NaN
## gdp.cap           -1.565e+02        NaN     NaN      NaN
## pop.density        1.933e+02        NaN     NaN      NaN
## med.age            2.022e+05        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -69519  -7960  -2246   4542  88232 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.202e+04  4.154e+04   0.289   0.7761  
## NO2_Concentration -1.175e+09  1.168e+09  -1.006   0.3293  
## gdp.cap            3.167e+00  1.454e+00   2.178   0.0447 *
## pop.density        5.813e+01  2.509e+01   2.317   0.0341 *
## med.age           -8.473e+02  2.107e+03  -0.402   0.6929  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31640 on 16 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.5774, Adjusted R-squared:  0.4718 
## F-statistic: 5.465 on 4 and 16 DF,  p-value: 0.005715

## 
## Call:
## lm(formula = totcases.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32404 -21359  -7186  14258  90747 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -8.667e+04  4.108e+04  -2.110   0.0460 *
## NO2_Concentration  8.479e+08  1.654e+09   0.513   0.6130  
## gdp.cap            5.136e-01  9.481e-01   0.542   0.5932  
## pop.density        1.954e+01  2.755e+02   0.071   0.9441  
## med.age            3.267e+03  1.374e+03   2.377   0.0262 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30220 on 23 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4975, Adjusted R-squared:  0.4101 
## F-statistic: 5.693 on 4 and 23 DF,  p-value: 0.002456
deaths.NO2 <- function(x) { 
                    covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = NO2_Concentration, y = totdeaths.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. NO2 Concentration"))
} 
NO2.lm.deaths.reg <- function(x) {
                        covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        lm(totdeaths.mil ~ NO2_Concentration + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(deaths.NO2(i)) 
   print(NO2.lm.deaths.reg(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1311.58  -566.69     9.46   532.01  1056.38 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)        1.027e+03  1.346e+03   0.762    0.452
## NO2_Concentration  5.949e+06  8.777e+06   0.678    0.504
## gdp.cap           -1.140e-02  8.326e-03  -1.369    0.182
## pop.density       -7.383e-02  1.036e-01  -0.712    0.482
## med.age            8.725e+00  3.338e+01   0.261    0.796
## 
## Residual standard error: 724.2 on 27 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1443, Adjusted R-squared:  0.01751 
## F-statistic: 1.138 on 4 and 27 DF,  p-value: 0.3597

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1308.9  -446.6   -12.6   258.7  4441.0 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -2.529e+03  3.356e+03  -0.753   0.4592  
## NO2_Concentration  4.585e+07  8.021e+07   0.572   0.5733  
## gdp.cap           -3.965e-02  5.143e-02  -0.771   0.4490  
## pop.density       -4.825e+00  1.839e+00  -2.624   0.0155 *
## med.age            1.417e+02  1.125e+02   1.259   0.2212  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1101 on 22 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.2587, Adjusted R-squared:  0.1239 
## F-statistic: 1.919 on 4 and 22 DF,  p-value: 0.1428

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -245.62  -64.36  -21.13   51.06  483.56 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -9.329e+02  2.373e+02  -3.931 0.000357 ***
## NO2_Concentration  1.203e+06  5.549e+06   0.217 0.829502    
## gdp.cap            1.500e-03  6.179e-03   0.243 0.809558    
## pop.density       -4.707e-02  1.911e-01  -0.246 0.806802    
## med.age            5.193e+01  1.048e+01   4.956 1.61e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 136 on 37 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5583, Adjusted R-squared:  0.5105 
## F-statistic: 11.69 on 4 and 37 DF,  p-value: 3.087e-06

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -386.42 -167.92  -62.21  131.12  585.64 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -7.698e+02  4.065e+02  -1.894   0.0777 .
## NO2_Concentration  1.645e+07  7.804e+06   2.108   0.0522 .
## gdp.cap           -8.021e-03  2.906e-03  -2.760   0.0146 *
## pop.density        1.067e-01  1.707e-01   0.625   0.5413  
## med.age            3.954e+01  1.556e+01   2.542   0.0226 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 294.3 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5256, Adjusted R-squared:  0.3991 
## F-statistic: 4.154 on 4 and 15 DF,  p-value: 0.01841

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -2.834e+04        NaN     NaN      NaN
## NO2_Concentration -1.223e+09        NaN     NaN      NaN
## gdp.cap           -2.207e+00        NaN     NaN      NaN
## pop.density        2.771e+00        NaN     NaN      NaN
## med.age            2.841e+03        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -284.60  -51.08    7.73   68.13  335.99 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)        7.906e+01  2.084e+02   0.379    0.711
## NO2_Concentration -7.100e+06  6.049e+06  -1.174    0.263
## gdp.cap            1.282e-02  7.330e-03   1.749    0.106
## pop.density        9.886e-02  1.210e-01   0.817    0.430
## med.age           -1.136e+00  1.019e+01  -0.112    0.913
## 
## Residual standard error: 150.4 on 12 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.4931, Adjusted R-squared:  0.3242 
## F-statistic: 2.919 on 4 and 12 DF,  p-value: 0.06712

## 
## Call:
## lm(formula = totdeaths.mil ~ NO2_Concentration + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1333.94  -396.57     5.37   308.10  1184.40 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -3.037e+03  8.427e+02  -3.603  0.00150 **
## NO2_Concentration  4.754e+07  3.392e+07   1.401  0.17447   
## gdp.cap           -1.507e-02  1.945e-02  -0.775  0.44625   
## pop.density        7.823e-01  5.651e+00   0.138  0.89111   
## med.age            9.481e+01  2.819e+01   3.363  0.00269 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 619.8 on 23 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.642,  Adjusted R-squared:  0.5797 
## F-statistic: 10.31 on 4 and 23 DF,  p-value: 6.214e-05

Multivariate graphs of COVID outcomes vs weighted NO2 concentrations.

#Weighted NO2 concentration vs COVID cases. 
  ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "World COVID-19 Cases per Million People vs. Population Weighted NO2 Concentration") 

cor(weighted.covid.NO2.df$totcases.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs") 
## [1] 0.1993439
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = weighted.covid.NO2.df)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = weighted.covid.NO2.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -77235 -14971  -4383   9250 121334 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -3.896e+04  9.102e+03  -4.280 3.10e-05 ***
## pop.weighted.NO2  4.978e+11  1.386e+12   0.359   0.7200    
## gdp.cap           2.795e-01  1.658e-01   1.686   0.0937 .  
## pop.density      -6.937e+00  3.105e+00  -2.234   0.0268 *  
## med.age           2.314e+03  3.481e+02   6.647 3.86e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31260 on 171 degrees of freedom
##   (76 observations deleted due to missingness)
## Multiple R-squared:  0.3888, Adjusted R-squared:  0.3745 
## F-statistic: 27.19 on 4 and 171 DF,  p-value: < 2.2e-16
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                          2.5 %        97.5 %
## (Intercept)      -5.692563e+04 -2.099168e+04
## pop.weighted.NO2 -2.238674e+12  3.234279e+12
## gdp.cap          -4.779897e-02  6.068672e-01
## pop.density      -1.306547e+01 -8.078500e-01
## med.age           1.626715e+03  3.001059e+03
#Weighted NO2 concentration vs COVID deaths. 
ggplot(data = weighted.covid.NO2.df, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "World COVID-19 Deaths per Million People vs. Population Weighted NO2 Concentration") 

cor(weighted.covid.NO2.df$totdeaths.mil, weighted.covid.NO2.df$pop.weighted.NO2, use = "complete.obs") 
## [1] 0.176339
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age, data = weighted.covid.NO2.df,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = weighted.covid.NO2.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1538.1  -355.5   -80.0   207.9  5168.1 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.092e+03  2.079e+02  -5.254 4.49e-07 ***
## pop.weighted.NO2  1.679e+10  3.155e+10   0.532   0.5953    
## gdp.cap          -8.183e-03  3.746e-03  -2.185   0.0303 *  
## pop.density      -1.614e-01  7.013e-02  -2.302   0.0226 *  
## med.age           6.303e+01  7.875e+00   8.004 1.94e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 705.9 on 167 degrees of freedom
##   (80 observations deleted due to missingness)
## Multiple R-squared:  0.3384, Adjusted R-squared:  0.3225 
## F-statistic: 21.35 on 4 and 167 DF,  p-value: 3.075e-14
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                          2.5 %        97.5 %
## (Intercept)      -1.502670e+03 -6.817902e+02
## pop.weighted.NO2 -4.550259e+10  7.908621e+10
## gdp.cap          -1.557841e-02 -7.881128e-04
## pop.density      -2.998430e-01 -2.295015e-02
## med.age           4.748233e+01  7.857581e+01
#Function for multivariate analysis by Super.region. 
cases.NO2 <- function(x) { 
                    weighted.covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
                        labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
} 
NO2.lm.cases.reg <- function(x) {
                        weighted.covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm()  
} #Function for linear regression statistics to be included in for loop below. 

for (i in regions.list) {
   print(cases.NO2(i)) 
   print(NO2.lm.cases.reg(i)) 
} #Loops function for continents of interest. 

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -63683 -26278  12377  24428  65629 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       1.102e+05  6.927e+04   1.591    0.123
## pop.weighted.NO2  1.121e+12  3.965e+12   0.283    0.780
## gdp.cap          -2.249e-01  4.425e-01  -0.508    0.615
## pop.density      -3.908e+00  5.711e+00  -0.684    0.500
## med.age          -1.030e+03  1.711e+03  -0.602    0.552
## 
## Residual standard error: 38650 on 27 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.06781,    Adjusted R-squared:  -0.0703 
## F-statistic: 0.491 on 4 and 27 DF,  p-value: 0.7423

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -39068  -9133   1164   9581  48875 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.157e+05  4.694e+04  -2.465 0.021973 *  
## pop.weighted.NO2  1.249e+13  5.813e+12   2.148 0.042948 *  
## gdp.cap          -1.188e+00  9.161e-01  -1.297 0.208219    
## pop.density      -1.291e+02  2.959e+01  -4.364 0.000248 ***
## med.age           5.776e+03  1.835e+03   3.147 0.004680 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19820 on 22 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.5161, Adjusted R-squared:  0.4281 
## F-statistic: 5.865 on 4 and 22 DF,  p-value: 0.002276

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -12682  -4271  -1596   3197  32440 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -5.181e+04  1.213e+04  -4.272  0.00013 ***
## pop.weighted.NO2 -7.071e+11  1.859e+12  -0.380  0.70582    
## gdp.cap           1.171e-01  3.547e-01   0.330  0.74315    
## pop.density      -3.858e+00  1.158e+01  -0.333  0.74092    
## med.age           3.013e+03  6.075e+02   4.960  1.6e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8096 on 37 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5567, Adjusted R-squared:  0.5088 
## F-statistic: 11.62 on 4 and 37 DF,  p-value: 3.286e-06

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -30024 -10681   -136  11610  50896 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2.071e+04  2.837e+04  -0.730    0.477    
## pop.weighted.NO2  1.822e+11  1.474e+12   0.124    0.903    
## gdp.cap           3.569e-01  2.139e-01   1.668    0.116    
## pop.density       6.330e+01  1.093e+01   5.790 3.57e-05 ***
## med.age           1.434e+03  1.107e+03   1.295    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21020 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7758, Adjusted R-squared:  0.716 
## F-statistic: 12.98 on 4 and 15 DF,  p-value: 9.189e-05

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -1.913e+05        NaN     NaN      NaN
## pop.weighted.NO2  2.886e+13        NaN     NaN      NaN
## gdp.cap           1.277e+01        NaN     NaN      NaN
## pop.density       3.630e+01        NaN     NaN      NaN
## med.age           8.563e+02        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -72601  -8795   1065   6032  84944 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       9.763e+03  4.106e+04   0.238   0.8151  
## pop.weighted.NO2 -6.330e+12  5.451e+12  -1.161   0.2626  
## gdp.cap           3.498e+00  1.362e+00   2.568   0.0206 *
## pop.density       5.596e+01  2.514e+01   2.226   0.0407 *
## med.age          -1.111e+03  1.861e+03  -0.597   0.5587  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31330 on 16 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.5856, Adjusted R-squared:  0.482 
## F-statistic: 5.653 on 4 and 16 DF,  p-value: 0.004944

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -36674 -20261  -6478  15784  91154 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -8.948e+04  4.153e+04  -2.155   0.0419 *
## pop.weighted.NO2  2.303e+12  6.795e+12   0.339   0.7377  
## gdp.cap           5.984e-01  9.407e-01   0.636   0.5310  
## pop.density       1.212e+02  1.666e+02   0.727   0.4743  
## med.age           3.421e+03  1.332e+03   2.568   0.0172 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30310 on 23 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4943, Adjusted R-squared:  0.4064 
## F-statistic: 5.621 on 4 and 23 DF,  p-value: 0.002629
deaths.NO2 <- function(x) { 
                    weighted.covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
} 
NO2.lm.deaths.reg <- function(x) {
                        weighted.covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(deaths.NO2(i)) 
   print(NO2.lm.deaths.reg(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1110.2  -598.4     2.3   522.0  1135.9 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       7.560e+02  1.308e+03   0.578    0.568
## pop.weighted.NO2  1.107e+10  7.489e+10   0.148    0.884
## gdp.cap          -1.077e-02  8.357e-03  -1.288    0.209
## pop.density      -7.344e-02  1.079e-01  -0.681    0.502
## med.age           1.762e+01  3.231e+01   0.545    0.590
## 
## Residual standard error: 730.1 on 27 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1304, Adjusted R-squared:  0.001597 
## F-statistic: 1.012 on 4 and 27 DF,  p-value: 0.4185

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1363.0  -363.4  -183.6   225.8  4360.9 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -2.693e+03  2.557e+03  -1.053  0.30367   
## pop.weighted.NO2  3.474e+11  3.166e+11   1.097  0.28446   
## gdp.cap          -4.590e-02  4.990e-02  -0.920  0.36764   
## pop.density      -4.620e+00  1.612e+00  -2.866  0.00897 **
## med.age           1.558e+02  9.998e+01   1.558  0.13340   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1080 on 22 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.2867, Adjusted R-squared:  0.157 
## F-statistic: 2.211 on 4 and 22 DF,  p-value: 0.101

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -216.20  -63.95  -15.86   42.52  349.34 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.099e+03  1.895e+02  -5.801 1.17e-06 ***
## pop.weighted.NO2  7.003e+10  2.904e+10   2.412    0.021 *  
## gdp.cap          -3.397e-04  5.541e-03  -0.061    0.951    
## pop.density       5.389e-02  1.809e-01   0.298    0.767    
## med.age           5.613e+01  9.491e+00   5.914 8.21e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 126.5 on 37 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.6178, Adjusted R-squared:  0.5765 
## F-statistic: 14.95 on 4 and 37 DF,  p-value: 2.328e-07

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -474.34 -169.86  -21.34  173.70  604.48 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -6.318e+02  4.240e+02  -1.490   0.1570  
## pop.weighted.NO2  3.167e+10  2.202e+10   1.438   0.1709  
## gdp.cap          -8.418e-03  3.197e-03  -2.633   0.0188 *
## pop.density       2.847e-01  1.634e-01   1.742   0.1019  
## med.age           4.244e+01  1.654e+01   2.566   0.0215 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 314.1 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4595, Adjusted R-squared:  0.3154 
## F-statistic: 3.188 on 4 and 15 DF,  p-value: 0.04403

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -2.317e+03        NaN     NaN      NaN
## pop.weighted.NO2  4.078e+11        NaN     NaN      NaN
## gdp.cap           1.846e-01        NaN     NaN      NaN
## pop.density       5.531e-01        NaN     NaN      NaN
## med.age          -3.957e+00        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -295.53  -74.64  -10.78   52.95  324.09 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       6.662e+01  2.099e+02   0.317   0.7565  
## pop.weighted.NO2 -3.257e+10  2.673e+10  -1.219   0.2464  
## gdp.cap           1.529e-02  6.695e-03   2.284   0.0414 *
## pop.density       9.516e-02  1.211e-01   0.786   0.4473  
## med.age          -3.381e+00  9.041e+00  -0.374   0.7149  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 149.8 on 12 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.4972, Adjusted R-squared:  0.3296 
## F-statistic: 2.966 on 4 and 12 DF,  p-value: 0.06438

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1125.3  -391.5  -114.2   302.0  1216.0 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -3.104e+03  8.845e+02  -3.510  0.00188 **
## pop.weighted.NO2  1.675e+10  1.447e+11   0.116  0.90887   
## gdp.cap          -3.280e-03  2.004e-02  -0.164  0.87139   
## pop.density       7.099e+00  3.548e+00   2.001  0.05732 . 
## med.age           1.052e+02  2.837e+01   3.707  0.00116 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 645.5 on 23 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6116, Adjusted R-squared:  0.5441 
## F-statistic: 9.056 on 4 and 23 DF,  p-value: 0.0001518
fatality.rate.NO2 <- function(x) { 
                    weighted.covid.NO2.df %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = case.fatality.rate)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
} 
NO2.lm.fatality.reg <- function(x) {
                        weighted.covid.NO2.df %>%
                        filter(Super.region == x) %>%
                        lm(case.fatality.rate ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(fatality.rate.NO2(i)) 
   print(NO2.lm.fatality.reg(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0110282 -0.0041976  0.0000133  0.0047031  0.0147586 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -1.920e-03  1.209e-02  -0.159   0.8750  
## pop.weighted.NO2 -2.468e+04  6.923e+05  -0.036   0.9718  
## gdp.cap          -1.234e-07  7.726e-08  -1.597   0.1219  
## pop.density      -1.622e-06  9.971e-07  -1.627   0.1154  
## med.age           5.934e-04  2.987e-04   1.987   0.0572 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006749 on 27 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3271, Adjusted R-squared:  0.2274 
## F-statistic: 3.281 on 4 and 27 DF,  p-value: 0.02577

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.022873 -0.008812 -0.005646  0.003473  0.060924 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       3.035e-02  5.049e-02   0.601    0.554
## pop.weighted.NO2  4.130e+06  6.252e+06   0.661    0.516
## gdp.cap           7.909e-09  9.854e-07   0.008    0.994
## pop.density      -4.125e-05  3.182e-05  -1.296    0.208
## med.age          -5.371e-06  1.974e-03  -0.003    0.998
## 
## Residual standard error: 0.02132 on 22 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1354, Adjusted R-squared:  -0.02178 
## F-statistic: 0.8614 on 4 and 22 DF,  p-value: 0.5023

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.016674 -0.007111 -0.001484  0.007006  0.022902 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.306e-02  1.541e-02   1.497    0.143
## pop.weighted.NO2  8.546e+05  2.361e+06   0.362    0.719
## gdp.cap          -3.823e-07  4.505e-07  -0.849    0.402
## pop.density      -1.137e-05  1.471e-05  -0.773    0.444
## med.age          -1.533e-04  7.717e-04  -0.199    0.844
## 
## Residual standard error: 0.01028 on 37 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0541, Adjusted R-squared:  -0.04816 
## F-statistic: 0.529 on 4 and 37 DF,  p-value: 0.7151

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045443 -0.014412 -0.001374  0.008952  0.135152 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       1.251e-01  5.582e-02   2.241   0.0406 *
## pop.weighted.NO2 -1.800e+05  2.899e+06  -0.062   0.9513  
## gdp.cap          -2.374e-07  4.209e-07  -0.564   0.5810  
## pop.density      -1.406e-05  2.151e-05  -0.654   0.5233  
## med.age          -3.051e-03  2.177e-03  -1.401   0.1814  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04135 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2886, Adjusted R-squared:  0.09894 
## F-statistic: 1.522 on 4 and 15 DF,  p-value: 0.2461

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       4.268e-02        NaN     NaN      NaN
## pop.weighted.NO2  1.367e+07        NaN     NaN      NaN
## gdp.cap           9.091e-06        NaN     NaN      NaN
## pop.density       3.624e-05        NaN     NaN      NaN
## med.age          -5.222e-03        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.038439 -0.031820 -0.012866  0.006726  0.200574 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       5.375e-02  9.051e-02   0.594    0.564
## pop.weighted.NO2 -1.716e+06  1.152e+07  -0.149    0.884
## gdp.cap          -1.943e-06  2.886e-06  -0.673    0.514
## pop.density      -2.409e-05  5.222e-05  -0.461    0.653
## med.age           1.009e-04  3.898e-03   0.026    0.980
## 
## Residual standard error: 0.06459 on 12 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.09934,    Adjusted R-squared:  -0.2009 
## F-statistic: 0.3309 on 4 and 12 DF,  p-value: 0.8519

## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.015968 -0.004985 -0.002020  0.006135  0.020052 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -2.611e-02  1.295e-02  -2.016   0.0556 . 
## pop.weighted.NO2  3.730e+05  2.119e+06   0.176   0.8618   
## gdp.cap          -3.885e-07  2.934e-07  -1.324   0.1985   
## pop.density       5.305e-05  5.195e-05   1.021   0.3178   
## med.age           1.257e-03  4.155e-04   3.027   0.0060 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.009454 on 23 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.389,  Adjusted R-squared:  0.2828 
## F-statistic: 3.661 on 4 and 23 DF,  p-value: 0.01895

Analysis of high and low groups.

high <- data.frame("group" = rep("high", length.out = nrow(high.covid)))
high.covid <- cbind(high, high.covid)
low <- data.frame("group" = rep("low", length.out = nrow(low.covid)))
low.covid <- cbind(low, low.covid)
df.covid <- rbind(high.covid, low.covid)

#Boxplot of NO2 Concentrations by case groups. 
ggplot(df.covid, aes(group,NO2_Concentration)) +
  geom_boxplot() +
  geom_jitter(color="red", size=0.4, alpha=0.9) +
  labs(title = "NO2 Concentrations Grouped by COVID Case Levels", x = "Case Count Group", y = "NO2 Concentration (mol/m^2)") +
  stat_compare_means(method = "t.test")

#Boxplot of weighted NO2 Concentrations by case groups. 
ggplot(df.covid, aes(group,pop.weighted.NO2)) +
  geom_boxplot() +
  geom_jitter(color="red", size=0.4, alpha=0.9) +
  labs(title = "Weighted NO2 Concentrations Grouped by COVID Case Levels", x = "Case Count Group", y = "Weighted NO2 Concentration") +
  stat_compare_means(method = "t.test")

rm(df.covid)

high <- data.frame("group" = rep("high", length.out = nrow(high.NO2)))
high.NO2 <- cbind(high, high.NO2)
low <- data.frame("group" = rep("low", length.out = nrow(low.NO2)))
low.NO2<- cbind(low, low.NO2)
df.NO2 <- rbind(high.NO2, low.NO2)

#Boxplot of COVID cases by NO2 groups. 
ggplot(df.NO2, aes(group,totcases.mil)) +
  geom_boxplot() +
  geom_jitter(color="red", size=0.4, alpha=0.9) +
  labs(title = "COVID Cases per Million Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Cases per Million") +
  stat_compare_means(method = "t.test")

#Boxplot of COVID deaths by NO2 groups. 
ggplot(df.NO2, aes(group,totdeaths.mil)) +
  geom_boxplot() +
  geom_jitter(color="red", size=0.4, alpha=0.9) +
  labs(title = "COVID Deaths per Million Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Deaths per Million") +
  stat_compare_means(method = "t.test")

#Boxplot of COVID CFR by NO2 groups. 
ggplot(df.NO2, aes(group,case.fatality.rate)) +
  geom_boxplot() +
  geom_jitter(color="red", size=0.4, alpha=0.9) +
  labs(title = "COVID Case Fatality Rate Grouped by NO2 Concentration Levels", x = "NO2 Concentration Group", y = "Case Fatality Rate") +
  stat_compare_means(method = "t.test")

rm(df.NO2)

Multivariate graphs of COVID outcomes vs weighted NO2 concentrations: NO2 group divisions.

#High NO2.
#Weighted NO2 concentration vs COVID cases. 
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "High NO2: World COVID-19 Cases per Million vs. Population Weighted NO2 Concentration") 

cor(high.NO2$totcases.mil, high.NO2$pop.weighted.NO2, use = "complete.obs") 
## [1] 0.2947513
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = high.NO2)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = high.NO2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -76771 -13402   -898  10680  96781 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -4.693e+04  1.137e+04  -4.130 7.70e-05 ***
## pop.weighted.NO2  2.568e+12  1.934e+12   1.328    0.187    
## gdp.cap           7.325e-02  2.134e-01   0.343    0.732    
## pop.density      -2.557e+01  1.886e+01  -1.356    0.178    
## med.age           2.557e+03  3.906e+02   6.548 2.79e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29550 on 97 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4757, Adjusted R-squared:  0.4541 
## F-statistic:    22 on 4 and 97 DF,  p-value: 6.055e-13
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                          2.5 %        97.5 %
## (Intercept)      -6.948951e+04 -2.437597e+04
## pop.weighted.NO2 -1.269957e+12  6.406016e+12
## gdp.cap          -3.502009e-01  4.966916e-01
## pop.density      -6.300107e+01  1.186173e+01
## med.age           1.782191e+03  3.332507e+03
#Weighted NO2 concentration vs COVID deaths. 
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "High NO2: World COVID-19 Deaths per Million vs. Population Weighted NO2 Concentration") 

cor(high.NO2$totdeaths.mil, high.NO2$pop.weighted.NO2, use = "complete.obs") 
## [1] 0.1381325
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age, data = high.NO2,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = high.NO2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1527.50  -368.98    -2.76   297.70  1542.15 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.233e+03  2.458e+02  -5.016 2.38e-06 ***
## pop.weighted.NO2  1.453e+10  4.183e+10   0.347   0.7291    
## gdp.cap          -1.025e-02  4.615e-03  -2.220   0.0287 *  
## pop.density      -6.787e-01  4.079e-01  -1.664   0.0994 .  
## med.age           7.137e+01  8.448e+00   8.448 2.97e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 639.1 on 97 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4783, Adjusted R-squared:  0.4568 
## F-statistic: 22.23 on 4 and 97 DF,  p-value: 4.771e-13
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                          2.5 %        97.5 %
## (Intercept)      -1.721059e+03 -7.452248e+02
## pop.weighted.NO2 -6.848890e+10  9.754728e+10
## gdp.cap          -1.940599e-02 -1.087171e-03
## pop.density      -1.488362e+00  1.309679e-01
## med.age           5.460066e+01  8.813500e+01
#Weighted NO2 concentration vs COVID CFR. 
ggplot(data = high.NO2, aes(x = pop.weighted.NO2, y = case.fatality.rate)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Case Fatality Rate") +
  labs(title = "High NO2: COVID-19 Case Fatality Rate vs. Population Weighted NO2 Concentration") 

cor(high.NO2$case.fatality.rate, high.NO2$pop.weighted.NO2, use = "complete.obs") 
## [1] -0.03359483
NO2.lm.deaths <- lm(case.fatality.rate ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age, data = high.NO2,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = case.fatality.rate ~ pop.weighted.NO2 + gdp.cap + 
##     pop.density + med.age, data = high.NO2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.021921 -0.007799 -0.003010  0.005463  0.072667 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       1.559e-02  5.383e-03   2.897  0.00466 **
## pop.weighted.NO2  5.512e+05  9.159e+05   0.602  0.54866   
## gdp.cap          -2.038e-07  1.010e-07  -2.017  0.04651 * 
## pop.density      -1.236e-05  8.932e-06  -1.383  0.16971   
## med.age           2.710e-04  1.850e-04   1.465  0.14612   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01399 on 97 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.05531,    Adjusted R-squared:  0.01635 
## F-statistic:  1.42 on 4 and 97 DF,  p-value: 0.2332
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                          2.5 %        97.5 %
## (Intercept)       4.909463e-03  2.627647e-02
## pop.weighted.NO2 -1.266530e+06  2.369022e+06
## gdp.cap          -4.043276e-07 -3.216153e-09
## pop.density      -3.008592e-05  5.371159e-06
## med.age          -9.611847e-05  6.381541e-04
#Function for multivariate analysis by Super.region. 
cases.NO2 <- function(x) { 
                    high.NO2 %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totcases.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
                        labs(title = paste0("COVID-19 Cases per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
} 
NO2.lm.cases.reg <- function(x) {
                        high.NO2 %>%
                        filter(Super.region == x) %>%
                        lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm()  
} #Function for linear regression statistics to be included in for loop below. 

for (i in regions.list) {
   print(cases.NO2(i)) 
   print(NO2.lm.cases.reg(i)) 
} #Loops function for continents of interest. 

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -65602 -18514   6059  25655  47547 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       1.654e+05  7.329e+04   2.257   0.0367 *
## pop.weighted.NO2  1.919e+12  6.745e+12   0.285   0.7793  
## gdp.cap          -4.519e-01  6.746e-01  -0.670   0.5115  
## pop.density      -2.695e+00  6.568e+01  -0.041   0.9677  
## med.age          -2.148e+03  1.820e+03  -1.180   0.2534  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35880 on 18 degrees of freedom
## Multiple R-squared:  0.1035, Adjusted R-squared:  -0.09568 
## F-statistic: 0.5197 on 4 and 18 DF,  p-value: 0.7224

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##      1      2      3      4      5      6      7 
##   8777  -1333  -2963   3990 -41812  22578  10764 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -8.756e+04  1.701e+05  -0.515    0.658
## pop.weighted.NO2  3.978e+13  5.529e+13   0.719    0.547
## gdp.cap           2.374e-01  3.175e+00   0.075    0.947
## pop.density      -2.936e+02  1.937e+02  -1.516    0.269
## med.age           3.341e+03  5.419e+03   0.617    0.600
## 
## Residual standard error: 35200 on 2 degrees of freedom
## Multiple R-squared:  0.6837, Adjusted R-squared:  0.05123 
## F-statistic: 1.081 on 4 and 2 DF,  p-value: 0.5325

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5341.0 -1442.3   -16.3  1741.1  4790.3 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -2.872e+04  7.407e+03  -3.878  0.00133 **
## pop.weighted.NO2  1.436e+12  1.349e+12   1.064  0.30311   
## gdp.cap           1.088e+00  3.367e-01   3.232  0.00521 **
## pop.density      -5.255e+00  6.990e+00  -0.752  0.46307   
## med.age           1.455e+03  4.584e+02   3.173  0.00590 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3055 on 16 degrees of freedom
## Multiple R-squared:  0.9084, Adjusted R-squared:  0.8855 
## F-statistic: 39.68 on 4 and 16 DF,  p-value: 4.086e-08

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25681.6 -14474.6    134.7  14315.8  19752.8 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -5.628e+03  4.620e+04  -0.122    0.906
## pop.weighted.NO2 -5.335e+12  6.591e+12  -0.809    0.442
## gdp.cap           6.226e-01  4.211e-01   1.478    0.178
## pop.density       2.113e+02  1.430e+02   1.478    0.178
## med.age           7.712e+02  1.601e+03   0.482    0.643
## 
## Residual standard error: 18510 on 8 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6966, Adjusted R-squared:  0.545 
## F-statistic: 4.593 on 4 and 8 DF,  p-value: 0.03207

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -1.913e+05        NaN     NaN      NaN
## pop.weighted.NO2  2.886e+13        NaN     NaN      NaN
## gdp.cap           1.277e+01        NaN     NaN      NaN
## pop.density       3.630e+01        NaN     NaN      NaN
## med.age           8.563e+02        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##        1        3        4        5        6        7        8 
##  -208.35  2990.80 -2634.82   -50.89   919.46   969.02 -1985.22 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       1.754e+04  7.185e+03   2.442   0.1347  
## pop.weighted.NO2 -7.075e+11  1.079e+12  -0.656   0.5793  
## gdp.cap           1.171e+00  1.936e-01   6.046   0.0263 *
## pop.density       2.474e+01  1.226e+01   2.018   0.1811  
## med.age          -8.820e+02  3.031e+02  -2.910   0.1006  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3291 on 2 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9516, Adjusted R-squared:  0.8547 
## F-statistic: 9.825 on 4 and 2 DF,  p-value: 0.09451

## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -36849 -21174  -4421  16219  92706 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -9.879e+04  4.657e+04  -2.121   0.0460 *
## pop.weighted.NO2  1.712e+12  7.678e+12   0.223   0.8257  
## gdp.cap           5.295e-01  9.843e-01   0.538   0.5963  
## pop.density       1.702e+02  1.817e+02   0.936   0.3597  
## med.age           3.647e+03  1.409e+03   2.588   0.0172 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31230 on 21 degrees of freedom
## Multiple R-squared:  0.4877, Adjusted R-squared:  0.3902 
## F-statistic: 4.999 on 4 and 21 DF,  p-value: 0.005452
deaths.NO2 <- function(x) { 
                    high.NO2 %>% 
                    filter(Super.region == x) %>%
                    ggplot(aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
                        geom_point(color = "#333aff") +
                        geom_smooth(color = "red") +
                        geom_smooth(method = "lm", color = "purple") +
                        theme_bw() +
                        labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
                        labs(title = paste0("COVID-19 Deaths per Million People in ", (Super.region = x), " vs. Population Weighted NO2 Concentration"))
} 
NO2.lm.deaths.reg <- function(x) {
                        high.NO2 %>%
                        filter(Super.region == x) %>%
                        lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age,.) %>%
                        summary.lm() #Function for linear regression statistics to be included in for loop below. 
}
for (i in regions.list) {
   print(deaths.NO2(i)) 
   print(NO2.lm.deaths.reg(i)) 
} #Loops function for continents of interest.

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1147.6  -548.2   172.5   397.8  1104.4 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       1.953e+03  1.528e+03   1.278    0.218
## pop.weighted.NO2 -1.882e+10  1.407e+11  -0.134    0.895
## gdp.cap          -2.058e-02  1.407e-02  -1.463    0.161
## pop.density      -4.121e-01  1.370e+00  -0.301    0.767
## med.age           4.738e+00  3.796e+01   0.125    0.902
## 
## Residual standard error: 748.2 on 18 degrees of freedom
## Multiple R-squared:  0.1215, Adjusted R-squared:  -0.07369 
## F-statistic: 0.6225 on 4 and 18 DF,  p-value: 0.6523

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##       1       2       3       4       5       6       7 
## -151.18   26.53   45.48   27.39  191.80  -85.56  -54.45 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -2.247e+03  9.264e+02  -2.426   0.1361  
## pop.weighted.NO2  6.239e+11  3.011e+11   2.072   0.1740  
## gdp.cap          -4.191e-02  1.729e-02  -2.424   0.1362  
## pop.density      -7.541e+00  1.055e+00  -7.149   0.0190 *
## med.age           1.381e+02  2.950e+01   4.681   0.0427 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 191.6 on 2 degrees of freedom
## Multiple R-squared:  0.9843, Adjusted R-squared:  0.9529 
## F-statistic: 31.35 on 4 and 2 DF,  p-value: 0.03115

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -153.54  -67.88  -45.37   84.55  229.88 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -9.691e+02  2.614e+02  -3.707  0.00192 **
## pop.weighted.NO2  1.182e+11  4.763e+10   2.483  0.02451 * 
## gdp.cap           1.448e-02  1.188e-02   1.219  0.24056   
## pop.density      -7.198e-02  2.467e-01  -0.292  0.77423   
## med.age           4.214e+01  1.618e+01   2.605  0.01916 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 107.8 on 16 degrees of freedom
## Multiple R-squared:  0.8294, Adjusted R-squared:  0.7868 
## F-statistic: 19.45 on 4 and 16 DF,  p-value: 5.47e-06

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -483.37 -148.69    2.96  168.01  431.23 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -1.381e+03  8.029e+02  -1.720   0.1238  
## pop.weighted.NO2  9.745e+10  1.145e+11   0.851   0.4196  
## gdp.cap          -1.179e-02  7.317e-03  -1.612   0.1457  
## pop.density      -1.071e+00  2.484e+00  -0.431   0.6777  
## med.age           6.608e+01  2.782e+01   2.375   0.0449 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 321.7 on 8 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5773, Adjusted R-squared:  0.366 
## F-statistic: 2.732 on 4 and 8 DF,  p-value: 0.1056

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
## ALL 5 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -2.317e+03        NaN     NaN      NaN
## pop.weighted.NO2  4.078e+11        NaN     NaN      NaN
## gdp.cap           1.846e-01        NaN     NaN      NaN
## pop.density       5.531e-01        NaN     NaN      NaN
## med.age          -3.957e+00        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 4 and 0 DF,  p-value: NA

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##       1       3       4       5       6       7       8 
##   2.972  21.518 -18.880 -18.419  44.625  34.423 -66.239 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.044e+02  1.443e+02   1.416    0.292
## pop.weighted.NO2 -1.320e+09  2.166e+10  -0.061    0.957
## gdp.cap           8.808e-03  3.887e-03   2.266    0.152
## pop.density       4.856e-01  2.462e-01   1.973    0.187
## med.age          -1.038e+01  6.085e+00  -1.707    0.230
## 
## Residual standard error: 66.07 on 2 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8163, Adjusted R-squared:  0.4488 
## F-statistic: 2.221 on 4 and 2 DF,  p-value: 0.3337

## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1130.4  -334.2  -105.7   371.4  1277.9 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -3.218e+03  9.651e+02  -3.335  0.00314 **
## pop.weighted.NO2 -3.946e+10  1.591e+11  -0.248  0.80653   
## gdp.cap          -3.597e-03  2.040e-02  -0.176  0.86170   
## pop.density       8.621e+00  3.766e+00   2.289  0.03253 * 
## med.age           1.103e+02  2.920e+01   3.776  0.00111 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 647.1 on 21 degrees of freedom
## Multiple R-squared:  0.6097, Adjusted R-squared:  0.5353 
## F-statistic: 8.201 on 4 and 21 DF,  p-value: 0.0003795
#Low NO2.
#Weighted NO2 concentration vs COVID cases. 
ggplot(data = low.NO2, aes(x = pop.weighted.NO2, y = totcases.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Cases per Million People") +
  labs(title = "Low NO2: World COVID-19 Cases per Million vs. Population Weighted NO2 Concentration") 

cor(low.NO2$totcases.mil, low.NO2$pop.weighted.NO2, use = "complete.obs") 
## [1] -0.2395034
NO2.lm.cases <- lm(totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + med.age, data = low.NO2)
summary.lm(NO2.lm.cases)
## 
## Call:
## lm(formula = totcases.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = low.NO2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -60468 -20076  -4999   9283 112386 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -1.314e+04  2.048e+04  -0.641   0.5234  
## pop.weighted.NO2 -1.057e+13  8.047e+12  -1.313   0.1934  
## gdp.cap           5.251e-01  2.860e-01   1.836   0.0706 .
## pop.density      -8.421e+00  3.803e+00  -2.214   0.0301 *
## med.age           1.554e+03  7.553e+02   2.057   0.0435 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33570 on 69 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.2558, Adjusted R-squared:  0.2126 
## F-statistic: 5.928 on 4 and 69 DF,  p-value: 0.0003683
confint(NO2.lm.cases) #Summary statistics for linear fit. 
##                          2.5 %        97.5 %
## (Intercept)      -5.399559e+04  2.772475e+04
## pop.weighted.NO2 -2.662089e+13  5.484073e+12
## gdp.cap          -4.540096e-02  1.095648e+00
## pop.density      -1.600856e+01 -8.339348e-01
## med.age           4.683803e+01  3.060399e+03
#Weighted NO2 concentration vs COVID deaths. 
ggplot(data = low.NO2, aes(x = pop.weighted.NO2, y = totdeaths.mil)) +
  geom_point(color = "#333aff", alpha = 1) +
  geom_smooth(color = "red") +
  geom_smooth(method = "lm", color = "purple") +
  theme_bw() +
  labs(x = "NO2 Concentration (mol/m^2)", y = "Total COVID-19 Deaths per Million People") +
  labs(title = "Low NO2: World COVID-19 Deaths per Million vs. Population Weighted NO2 Concentration") 

cor(low.NO2$totdeaths.mil, low.NO2$pop.weighted.NO2, use = "complete.obs") 
## [1] -0.04312052
NO2.lm.deaths <- lm(totdeaths.mil ~ pop.weighted.NO2 + gdp.cap  + pop.density + med.age, data = low.NO2,)
summary.lm(NO2.lm.deaths)
## 
## Call:
## lm(formula = totdeaths.mil ~ pop.weighted.NO2 + gdp.cap + pop.density + 
##     med.age, data = low.NO2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -890.4 -294.8 -191.3  148.2 5224.5 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -6.747e+02  5.134e+02  -1.314   0.1935  
## pop.weighted.NO2  1.527e+11  2.046e+11   0.746   0.4582  
## gdp.cap          -4.328e-03  6.753e-03  -0.641   0.5238  
## pop.density      -1.031e-01  8.997e-02  -1.146   0.2560  
## med.age           4.109e+01  1.817e+01   2.262   0.0271 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 792 on 65 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.09308,    Adjusted R-squared:  0.03726 
## F-statistic: 1.668 on 4 and 65 DF,  p-value: 0.1682
confint(NO2.lm.deaths) #Summary statistics for linear fit.
##                          2.5 %       97.5 %
## (Intercept)      -1.700069e+03 3.507511e+02
## pop.weighted.NO2 -2.559880e+11 5.614107e+11
## gdp.cap          -1.781470e-02 9.158708e-03
## pop.density      -2.827869e-01 7.657921e-02
## med.age           4.807252e+00 7.736744e+01

Correlation analysis: satellite vs. ground EPA.

Formatting sattelite and ground data (US only).

#Getting satellite data specifically for this time frame. 
July_Oct2018.sat.r <- raster('US_ground_NO2/US_NO2_2018-07-01_start.tif') %>%
  flip(direction='y')
July_Oct2018.sat.r <- crop(July_Oct2018.sat.r, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
July_Oct2018.sat.r <- mask(July_Oct2018.sat.r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])

#Direct conversion from NO2 raster to data frame while filtering by shapefile. 
continental.US.fxn <- function(x) {
  r1 <- raster(x) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  r1 <- mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r1 <- rasterToPoints(r1, spatial = TRUE) 
  July_Oct2018.sat <- r1 %>% 
      data.frame() %>% 
      select(Latitude = 3, Longitude = 2, NO2_Concentration = 1)
  assign("July_Oct2018.sat", July_Oct2018.sat, .GlobalEnv)
}  #Final satellite data to use for comparison to EPA data. 
continental.US.fxn("US_ground_NO2/US_NO2_2018-07-01_start.tif")

#Selecting just latitude and longitude from data frame just made to use with Pargasite retrieval. 
US.lat.lon <- July_Oct2018.sat %>% select(c(Latitude, Longitude))
# write.csv(US.lat.lon, file.path("US_ground_NO2", "US_lat_lon.csv"), row.names = FALSE) #Used to gather EPA data from Pargasite. 

#Reading in EPA data
#Pargasite: only overlapping time frame is July 2018 - October 2018 (Pargasite stops working in November 2018)
July_Oct2018.ground <- read.csv(file = './US_ground_NO2/July_Oct2018NO2.csv') %>% select(c("Latitude", "Longitude", "no2_estimate")) %>% rename(NO2_Concentration = no2_estimate)
July_Oct2018.ground <- data.frame(July_Oct2018.ground) #Final EPA data to use in comparison to satellite data. 

#EPA data frame changed to raster format.
July_Oct2018.ground.r <- July_Oct2018.ground %>% select(c("Longitude", "Latitude", "NO2_Concentration")) %>% rasterFromXYZ() 

Checking for satellite data correlation to ground data (US only).

#Resampling raster data to be same size for comparisons. 
July_Oct2018.sat.r.re <- July_Oct2018.sat.r %>% resample(July_Oct2018.ground.r)

#Perform correlation test between satellite and ground rasters. 
cor(values(July_Oct2018.sat.r.re),
    values(July_Oct2018.ground.r),
    use = "na.or.complete")
## [1] 0.03616317
#Linear model estimation. 
lm1 <- lm(values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
summary(lm1)
## 
## Call:
## lm(formula = values(July_Oct2018.sat.r.re) ~ values(July_Oct2018.ground.r))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.184e-05 -5.636e-06 -7.730e-07  3.633e-06  4.478e-05 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.152e-05  8.089e-07  26.603   <2e-16 ***
## values(July_Oct2018.ground.r) 1.211e-07  1.173e-07   1.032    0.302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.48e-06 on 813 degrees of freedom
##   (417 observations deleted due to missingness)
## Multiple R-squared:  0.001308,   Adjusted R-squared:  7.937e-05 
## F-statistic: 1.065 on 1 and 813 DF,  p-value: 0.3025
#Local correlation estimates--shows where locally on map specific regions are positively or negatively correlated. (Getting an error: try to replicate code from this website https://statnmap.com/2018-01-27-spatial-correlation-between-rasters/)

Plotting satellite vs ground data (US only).

ggplot() +
 geom_raster(data = July_Oct2018.sat, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle("NO2 US Satellite Data, July-Oct 2018") +
  #scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100)) +
    north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
    scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
  xlim(-130,-65) +
  ylim(23,49) +
  coord_quickmap()

ggplot() +
 geom_raster(data = July_Oct2018.ground, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle("NO2 US EPA Ground Data, July-Oct 2018") +
  #scale_fill_manual(breaks = c(0, 0.0000025, 0.000005, 0.0000075, 0.00001, 0.00002)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100)) +
    north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
    scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
  xlim(-130,-65) +
  ylim(23,49) +
  coord_quickmap()

Formatting sattelite and ground data to be used for correlation analysis (China only).

#No success in retrieving data so far due to lack of shapefiles. 

Reading in shapefiles for six cities analysis.

#Unsuccessful without ArcGIS access.
london.shp <- st_read("six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp")  # Read shapefile as an sf object
## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/london/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB 1936 / British National Grid
class(london.shp)
## [1] "sf"         "data.frame"
london.shp <- st_geometry(london.shp)
plot(london.shp)

seattle.shp <- st_read("six_cities_shapefiles/seattle/WA.shp")  # Read shapefile as an sf object
## Reading layer `WA' from data source `/Users/alanaschreibman/COVID-19_NO2/six_cities_shapefiles/seattle/WA.shp' using driver `ESRI Shapefile'
## Simple feature collection with 39 features and 48 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -124.7314 ymin: 45.54325 xmax: -116.9182 ymax: 49
## CRS:           NA
class(seattle.shp)
## [1] "sf"         "data.frame"
seattle.shp <- st_geometry(seattle.shp)
plot(seattle.shp)

World Air Quality Data City Analysis: data preparation.

#Unsuccessful: cannot find shapefiles for the selected cities without ArcGIS access.
#Read in world AQI data for 6 cities. Dates range 12/30/19 to 4/5/20. 
waqi.main <- read.csv(file = 'waqi_NO2.csv', header = TRUE) 
seattle.df <- filter(waqi.main, City == "Seattle") 
  seattle.df$Date <- as.Date(seattle.df$Date, format = "%m/%d/%y")
  seattle.df <- arrange(seattle.df, Date)
wuhan.df <- filter(waqi.main, City == "Wuhan")
  wuhan.df$Date <- as.Date(wuhan.df$Date, format = "%m/%d/%y")
  wuhan.df <- arrange(wuhan.df, Date)
milan.df <- filter(waqi.main, City == "Milan")
  milan.df$Date <- as.Date(milan.df$Date, format = "%m/%d/%y")
  milan.df <- arrange(milan.df, Date)
london.df <- filter(waqi.main, City == "London")
  london.df$Date <- as.Date(london.df$Date, format = "%m/%d/%y")
  london.df <- arrange(london.df, Date)
saopaulo.df <- filter(waqi.main, City == "Sao Paulo") 
  saopaulo.df$Date <- as.Date(saopaulo.df$Date, format = "%m/%d/%y")
  saopaulo.df <- arrange(saopaulo.df, Date)
johannesburg.df <- filter(waqi.main, City == "Johannesburg") 
  johannesburg.df$Date <- as.Date(johannesburg.df$Date, format = "%m/%d/%y")
  johannesburg.df <- arrange(johannesburg.df, Date)

dates.2020.Q1 <- seq(as.Date("2019-12-30"), as.Date("2020-04-05"), by=7) #List of dates starting Dec 30th 2019. 
#Averaging NO2 data by the week. 
#Seattle
seattle.ground.fxn <- function(x) {
  date.filter <- subset(seattle.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  seattle.avg.df <- data.frame(mean)
  names(seattle.avg.df)[1] <- "Mean_NO2_Concentration"
  Week_start_date <- as.Date(dates.2020.Q1[[x]])
  Type <- rep("satellite", length.out = nrow(seattle.avg.df)) 
  seattle.ground.weekly <- cbind(Week_start_date, Type, seattle.avg.df) 
  seattle.ground.weekly <- seattle.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
seattle.ground.weekly.df <- purrr::map_dfr(x, seattle.ground.fxn) 

#Wuhan
wuhan.ground.NO2.fxn <- function(x) {
  date.filter <- subset(wuhan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  date.start <- as.Date(dates.2020.Q1[[x]])
  wuhan.avg.df <- data.frame(date.start, wuhan.df[1,3], mean)
  names(wuhan.avg.df)[1] <- "Week_start_date"
  names(wuhan.avg.df)[2] <- "City"
  names(wuhan.avg.df)[3] <- "Mean_of_Median_NO2"
  output <- wuhan.avg.df 
  return(output)
}
wuhan.ground.df <- purrr::map_dfr(x, wuhan.ground.NO2.fxn) 

#Milan
milan.ground.NO2.fxn <- function(x) {
  date.filter <- subset(milan.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  date.start <- as.Date(dates.2020.Q1[[x]])
  milan.avg.df <- data.frame(date.start, milan.df[1,3], mean)
  names(milan.avg.df)[1] <- "Week_start_date"
  names(milan.avg.df)[2] <- "City"
  names(milan.avg.df)[3] <- "Mean_of_Median_NO2"
  output <- milan.avg.df 
  return(output)
}
milan.ground.df <- purrr::map_dfr(x, milan.ground.NO2.fxn) 

#London
london.ground.fxn <- function(x) {
  date.filter <- subset(london.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  london.avg.df <- data.frame(mean)
  names(london.avg.df)[1] <- "Mean_NO2_Concentration"
  Week_start_date <- as.Date(dates.2020.Q1[[x]])
  Type <- rep("satellite", length.out = nrow(london.avg.df)) 
  london.ground.weekly <- cbind(Week_start_date, Type, london.avg.df) 
  london.ground.weekly <- london.ground.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}
x <- seq_along(dates.2020.Q1)
london.ground.weekly <- purrr::map_dfr(x, london.ground.fxn)

#Sao Paulo
saopaulo.ground.NO2.fxn <- function(x) {
  date.filter <- subset(saopaulo.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  date.start <- as.Date(dates.2020.Q1[[x]])
  saopaulo.avg.df <- data.frame(date.start, saopaulo.df[1,3], mean)
  names(saopaulo.avg.df)[1] <- "Week_start_date"
  names(saopaulo.avg.df)[2] <- "City"
  names(saopaulo.avg.df)[3] <- "Mean_of_Median_NO2"
  output <- saopaulo.avg.df 
  return(output)
}
saopaulo.ground.df <- purrr::map_dfr(x, saopaulo.ground.NO2.fxn) 

#Jonannesburg
jonannesburg.ground.NO2.fxn <- function(x) {
  date.filter <- subset(johannesburg.df, Date >= dates.2020.Q1[[x]] & Date < dates.2020.Q1[[x]] + 6)
  mean <- mean(date.filter$median)
  date.start <- as.Date(dates.2020.Q1[[x]])
  johannesburg.avg.df <- data.frame(date.start, johannesburg.df[1,3], mean)
  names(johannesburg.avg.df)[1] <- "Week_start_date"
  names(johannesburg.avg.df)[2] <- "City"
  names(johannesburg.avg.df)[3] <- "Mean_of_Median_NO2"
  output <- johannesburg.avg.df 
  return(output)
}
jonannesburg.ground.df <- purrr::map_dfr(x, jonannesburg.ground.NO2.fxn) 

#Retrieving satellite data:
#Used code:
#gsutil -m cp \
# >   "file names" \
# >   COVID-19_NO2/six_cities_ground_NO2

# in terminal to copy all files from google cloud storage into folders in working directory.

#Plan:
#get shapefiles for each city
#use shapefiles to filter satellite data for each city and make data frame equivalent to ground data per week
#make graphs for each comparing the two data sets

#Creates list of raster files from folder to call in function. 
seattle.rastlist <- list.files(path = "six_cities_ground_NO2/seattle", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Function to make data frame for weekly average NO2 concentration. 
seattle.sat.fxn <- function(x, y) {
  r <- raster(paste0("six_cities_ground_NO2/seattle/", x))  %>% flip(direction='y')  
  r.vals <- raster::extract(r, `seattle.shp`)
  r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
  final.df <- data.frame(`seattle.shp`@data, r.mean)  
  Week_start_date <- as.Date(dates.2020.Q1[[y]])
  Type <- rep(satellite, length.out = nrow(final.df)) 
  seattle.sat.weekly <- cbind(final.df, Date, Week, Type) 
  seattle.sat.weekly <- seattle.sat.weekly %>% dplyr::mutate(Date = as.factor(strftime(as.Date(Date, format="%Y-%m-%d"), format="%m-%d")))
  names(seattle.sat.weekly)[11] <- "Mean_NO2_Concentration"
  seattle.sat.weekly <- seattle.sat.weekly %>% select(Type, Date, Mean_NO2_Concentration)
}

x <- seattle.rastlist
y <- 1:length(dates.2020.Q1)
seattle.sat.weekly.df <- purrr::map2_dfr(x, y, seattle.sat.fxn) 
seattle.weekly.df <- rbind(seattle.sat.weekly.df, seattle.ground.weekly.df) 


london.rastlist <- list.files(path = "six_cities_ground_NO2/london", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

london.sat.fxn <- function(x, y) {
  r <- raster(paste0("six_cities_ground_NO2/london/", x))  %>% flip(direction='y')  
  r.vals <- raster::extract(r, `london.shp`)
  r.mean <- lapply(r.vals, FUN=mean, na.rm = TRUE)
  final.df <- data.frame(r.mean)  
  Week_start_date <- as.Date(dates.2020.Q1[[y]])
  Type <- rep("satellite", length.out = nrow(final.df)) 
  london.sat.weekly <- cbind(final.df, Week_start_date, Type) 
  names(london.sat.weekly)[1] <- "Mean_NO2_Concentration"
  london.sat.weekly <- london.sat.weekly %>% dplyr::select(Type, Week_start_date, Mean_NO2_Concentration)
}

x <- london.rastlist
y <- 1:length(dates.2020.Q1)
london.sat.weekly <- purrr::map2_dfr(x, y, london.sat.fxn) 
london.weekly.df <- rbind(london.sat.weekly,london.ground.weekly.df) 

Time analysis: NO2 and COVID changes during the COVID-19 pandemic.

COVID time analysis: interrupted series time analysis plots.

time.series.theme <- function() {
  theme(legend.key.size = unit(1, 'cm'),
        plot.title = element_text(size=22),
        legend.position="right",
        legend.title = element_text(size=14),
        legend.text = element_text(size=10),
        axis.title=element_text(size = 10),
        axis.text.y=element_text(size=10),
        axis.text.x=element_text(size=10,angle=90, hjust=1)) 
}

#Cases
ggplot(mean.newcases.df, aes(x = Date, y = Mean_Weekly_Cases)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  ggtitle("World Weekly Averaged COVID Cases") +
  ylab("Number of Cases") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2020-01-20",format="%Y-%m-%d"),as.Date("2021-07-04",format="%Y-%m-%d"))) +
ylim(0, 840000) +
  
  #Pre-lockdown
  geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=mean.newcases.df,aes(x=as.Date("2020-01-08"),y=8.35E5),label="Pre-Lockdown",color = "black", size = 3) +
  geom_vline(data=mean.newcases.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=mean.newcases.df,aes(x=as.Date("2020-04-01"),y=8.35E5),label="Lockdown 1", color = "black", size = 3) +
  geom_vline(data=mean.newcases.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=mean.newcases.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2021-07-04",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=mean.newcases.df,aes(x=as.Date("2020-09-15"),y=8.35E5),label="Re-Opening", color = "black", size = 3) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("covid_cases_timeseries.pdf")

#Deaths
ggplot(mean.newdeaths.df , aes(x = Date, y = Mean_Weekly_Deaths)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  ggtitle("World Weekly Averaged COVID Deaths") +
  ylab("Number of Deaths") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2020-01-20",format="%Y-%m-%d"),as.Date("2021-07-04",format="%Y-%m-%d"))) +
ylim(0, 1.5E4) +
  
  #Pre-lockdown
  geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-01-08"),y=1.475E4),label="Pre-Lockdown",color = "black", size = 3) +
  geom_vline(data=mean.newdeaths.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-04-01"),y=1.475E4),label="Lockdown 1", color = "black", size = 3) +
  geom_vline(data=mean.newdeaths.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=mean.newdeaths.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2021-07-04",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=mean.newdeaths.df,aes(x=as.Date("2020-09-15"),y=1.475E4),label="Re-Opening", color = "black", size = 3) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("covid_deaths_timeseries.pdf")

Urban areas only interrupted time series analysis.

#Used code:
#gsutil -m cp \
# >   "file names" \
# >   COVID-19_NO2/time_analysis_2020

# in terminal to copy all files from google cloud storage into folders in working directory. 

#2019 data. 
#Creates list of raster files from folder to call in function. 
rastlist.2019.left <- list.files(path = "time_analysis_2019_left", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)
rastlist.2019.right <- list.files(path = "time_analysis_2019_right", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018. 
        
#Function to make data frame for weekly average NO2 concentration. 
raster.list.fxn <- function(x1, x2) {
  left <- raster(paste0("time_analysis_2019_left/", x1)) %>%
    flip(direction='y') 
  right <- raster(paste0("time_analysis_2019_right/", x2)) %>%
    flip(direction='y') 
  x <- list(left, right)
  names(x)[1:2] <- c('x', 'y')
  x$fun <- mean
  x$na.rm <- TRUE
  world.NO2.r <- do.call(mosaic, x) #Combines left and right halves of world raster.
  world.NO2.r
}

x1 <- as.list(rastlist.2019.left)
x2 <- as.list(rastlist.2019.right)
urban.2019.weekly.list <- purrr::map2(x1, x2, raster.list.fxn)

raster.extract.fxn <- function(r, y) {
  r.vals <- exact_extract(r, world.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
  Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6)) 
  Period <- rep(2019, length.out = nrow(final.df)) 
  urban.2019.weekly <- cbind(final.df, Period, Date, Week) 
  urban.2019.weekly <- urban.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
  
r <- urban.2019.weekly.list
y <- as.list(1:length(dates.2019))
urban.2019.weekly.df <- purrr::map2_dfr(r, y, raster.extract.fxn)

#2020 data.  
#Creates list of raster files from folder to call in function. 
rastlist.2020.left <- list.files(path = "time_analysis_2020_left", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)
rastlist.2020.right <- list.files(path = "time_analysis_2020_right", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019. 

#Function to make data frame for weekly average NO2 concentration. 
raster.list.fxn <- function(x1, x2) {
  left <- raster(paste0("time_analysis_2020_left/", x1)) %>%
    flip(direction='y') 
  right <- raster(paste0("time_analysis_2020_right/", x2)) %>%
    flip(direction='y') 
  x <- list(left, right)
  names(x)[1:2] <- c('x', 'y')
  x$fun <- mean
  x$na.rm <- TRUE
  world.NO2.r <- do.call(mosaic, x) #Combines left and right halves of world raster. 
  world.NO2.r
}
  
x1 <- as.list(rastlist.2020.left)
x2 <- as.list(rastlist.2020.right)
urban.2020.weekly.list <- purrr::map2(x1, x2, raster.list.fxn)

raster.extract.fxn <- function(r, y) {
  r.vals <- exact_extract(r, world.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2020[[y]]))  
  Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6)) 
  Period <- rep(2020, length.out = nrow(final.df)) 
  urban.2020.weekly <- cbind(final.df, Period, Date, Week) 
  urban.2020.weekly <- urban.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

r <- urban.2020.weekly.list
y <- as.list(1:length(dates.2020))
urban.2020.weekly.df <- purrr::map2_dfr(r, y, raster.extract.fxn)
urban.weekly.df <- rbind(urban.2019.weekly.df, urban.2020.weekly.df) 
urban.weekly.df$Date <- as.Date(urban.weekly.df$Date)
urban.weekly.df$Period <- as.factor(urban.weekly.df$Period)
write.csv(urban.weekly.df, file = 'time_series_csv/urban.weekly.NO2.csv')
rm(urban.weekly.df)

Urban areas time analysis: interrupted series time analysis plots.

urban.weekly.df <- read.csv(file = 'time_series_csv/urban.weekly.NO2.csv') 
urban.weekly.df$Date <- as.Date(urban.weekly.df$Date)
urban.weekly.df$Period <- as.factor(urban.weekly.df$Period)

urban.plot <- ggplot(urban.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() + 
  time.series.theme() +
  labs(fill = "Year of Measured Pollution") +
  ggtitle("Urban Areas NO2 in 2019 vs 2020") +
  ylab("NO2 Concentration \n(mol/m^2)") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 3.3E-5) +
  
  #Pre-lockdown
  geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=urban.weekly.df,aes(x=as.Date("2020-01-08"),y=3.275E-5),label="Pre-Lockdown",color = "black", size = 1.5) +
  geom_vline(data=urban.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=urban.weekly.df,aes(x=as.Date("2020-04-01"),y=3.275E-5),label="Lockdown 1", color = "black", size = 1.5) +
  geom_vline(data=urban.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=urban.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=3.25E-5,ymax=3.3E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=urban.weekly.df,aes(x=as.Date("2020-09-15"),y=3.275E-5),label="Re-Opening", color = "black", size = 1.5) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("urban_time_series.pdf")

Combining COVID time series data frame with NO2.

covid.weekly.df <- mean.newcases.df %>% inner_join(mean.newdeaths.df, by = c("Date", "Week"))
total.weekly.df <- urban.weekly.df %>% left_join(covid.weekly.df, by = c("Date", "Week"))

#Cases
mean.newcases.plot <- ggplot(total.weekly.df, aes(x = Date, y = Mean_Weekly_Cases, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  ggtitle("World Weekly Averaged COVID Cases") +
  ylab("Number of Cases") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 840000) +
  
  #Pre-lockdown
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-01-08"),y=8.35E5),label="Pre-Lockdown",color = "black", size = 1.5) +
  geom_vline(data=total.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-04-01"),y=8.35E5),label="Lockdown 1", color = "black", size = 1.5) +
  geom_vline(data=total.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=8.3E5,ymax=8.4E5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-09-15"),y=8.35E5),label="Re-Opening", color = "black", size = 1.5) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black"))

#Deaths
mean.newdeaths.plot <- ggplot(total.weekly.df , aes(x = Date, y = Mean_Weekly_Deaths, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  ggtitle("World Weekly Averaged COVID Deaths") +
  ylab("Number of Deaths") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 1.5E4) +
  
  #Pre-lockdown
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-01-08"),y=1.475E4),label="Pre-Lockdown",color = "black", size = 1.5) +
  geom_vline(data=total.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-07-01",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-04-01"),y=1.475E4),label="Lockdown 1", color = "black", size = 1.5) +
  geom_vline(data=total.weekly.df,xintercept=as.Date("2020-07-01",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=total.weekly.df,aes(xmin=as.Date("2020-07-01",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"),ymin=1.45E4,ymax=1.5E4),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=total.weekly.df,aes(x=as.Date("2020-09-15"),y=1.475E4),label="Re-Opening", color = "black", size = 1.5) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) 

NO2.vs.cases <- plot_grid(urban.plot, mean.newcases.plot, labels = c('A', 'B'), ncol = 1, align = "v") 
ggsave("NO2_vs_cases.pdf", NO2.vs.cases)
NO2.vs.cases 

NO2.vs.deaths <- plot_grid(urban.plot, mean.newdeaths.plot, labels = c('A', 'B'), ncol = 1, align = "v") 
ggsave("NO2_vs_deaths.pdf", NO2.vs.deaths)
NO2.vs.deaths 

US urban areas only interrupted time series analysis.

us.urban.sf <- us.urban.sf %>% filter(UATYP10=="U")

#2019 data. 
#Creates list of raster files from folder to call in function. 
us.rastlist.2019 <- list.files(path = "US_time_analysis_2019", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018. 
 
#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2019/", x)) %>%
    flip(direction='y') 
  r.vals <- exact_extract(r, us.urban.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
  Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6)) 
  Period <- rep(2019, length.out = nrow(final.df)) 
  us.urban.2019.weekly <- cbind(final.df, Period, Date, Week) 
  us.urban.2019.weekly <- us.urban.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
  
x <- as.list(us.rastlist.2019)
y <- as.list(1:length(dates.2019))
us.urban.2019.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)

#2020 data.  
#Creates list of raster files from folder to call in function. 
us.rastlist.2020 <- list.files(path = "US_time_analysis_2020", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2020/", x)) %>%
    flip(direction='y') 
  r.vals <- exact_extract(r, us.urban.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2020[[y]]))  
  Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6)) 
  Period <- rep(2020, length.out = nrow(final.df)) 
  us.urban.2020.weekly <- cbind(final.df, Period, Date, Week) 
  us.urban.2020.weekly <- us.urban.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- as.list(us.rastlist.2020)
y <- as.list(1:length(dates.2020))
us.urban.2020.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn) 
us.urban.weekly.df <- rbind(us.urban.2019.weekly.df, us.urban.2020.weekly.df) 
us.urban.weekly.df$Date <- as.Date(us.urban.weekly.df$Date)
us.urban.weekly.df$Period <- as.factor(us.urban.weekly.df$Period)
write.csv(us.urban.weekly.df, file = 'time_series_csv/us.urban.weekly.NO2.csv')
rm(us.urban.weekly.df)

US urban areas only: interrupted time series analysis plots.

us.urban.weekly.df <- read.csv(file = 'time_series_csv/us.urban.weekly.NO2.csv')
us.urban.weekly.df$Date <- as.Date(us.urban.weekly.df$Date)
us.urban.weekly.df$Period <- as.factor(us.urban.weekly.df$Period)

us.urban.plot <- ggplot(us.urban.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  labs(fill = "Year of Measured Pollution") +
  ggtitle("US Urban NO2 in 2019 vs 2020") +
  ylab("NO2 Concentration (mol/m^2)") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 4.6E-5) +
  
  #Pre-lockdown
  geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-02-07"),y=4.575E-5),label="Pre-Lockdown",color = "black", size = 2) +
  geom_vline(data=us.urban.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-05-01"),y=4.575E-5),label="Lockdown 1", color = "black", size = 2) +
  geom_vline(data=us.urban.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-08-15"),y=4.575E-5),label="Re-Opening", color = "black", size = 2) +
  geom_vline(data=us.urban.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 2
  geom_rect(data=us.urban.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.urban.weekly.df,aes(x=as.Date("2020-12-10"),y=4.575E-5),label="Lockdown 2", color = "black", size = 2) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("US_urban_time_series.pdf")

US rural areas only interrupted time series analysis.

us.urban.sp <- as(us.urban.sf, Class = "Spatial")
us.rural.sp <- gDifference(wrld_simpl[wrld_simpl$NAME == "United States", ], us.urban.sp)
us.rural.sf <- st_as_sf(us.rural.sp)

#2019 data. 
#Creates list of dates to call in function. 
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018. 
 
#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2019/", x)) %>%
    flip(direction='y') 
  r.vals <- exact_extract(r, us.rural.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
  Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6)) 
  Period <- rep(2019, length.out = nrow(final.df)) 
  us.rural.2019.weekly <- cbind(final.df, Period, Date, Week) 
  us.rural.2019.weekly <- us.rural.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}
  
x <- as.list(us.rastlist.2019)
y <- as.list(1:length(dates.2019))
us.rural.2019.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn)

#2020 data.  
#Creates list of dates to call in function. 
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2020/", x)) %>%
    flip(direction='y') 
  r.vals <- exact_extract(r, us.rural.sf, weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame("NO2_Concentration" = r.mean)   
  Date <- as.factor(as.Date(dates.2020[[y]]))  
  Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6)) 
  Period <- rep(2020, length.out = nrow(final.df)) 
  us.rural.2020.weekly <- cbind(final.df, Period, Date, Week) 
  us.rural.2020.weekly <- us.rural.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- as.list(us.rastlist.2020)
y <- as.list(1:length(dates.2020))
us.rural.2020.weekly.df <- purrr::pmap_dfr(list(x, y), raster.extract.fxn) 
us.rural.weekly.df <- rbind(us.rural.2019.weekly.df, us.rural.2020.weekly.df) 
us.rural.weekly.df$Date <- as.Date(us.rural.weekly.df$Date)
us.rural.weekly.df$Period <- as.factor(us.rural.weekly.df$Period)
write.csv(us.rural.weekly.df, file = 'time_series_csv/us.rural.weekly.NO2.csv')
rm(us.rural.weekly.df)

US rural areas only: interrupted time series analysis plots.

us.rural.weekly.df <- read.csv(file = 'time_series_csv/us.rural.weekly.NO2.csv')
us.rural.weekly.df$Date <- as.Date(us.rural.weekly.df$Date)
us.rural.weekly.df$Period <- as.factor(us.rural.weekly.df$Period)

us.rural.plot <- ggplot(us.rural.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  labs(fill = "Year of Measured Pollution") +
  ggtitle("US Rural NO2 in 2019 vs 2020") +
  ylab("NO2 Concentration (mol/m^2)") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 4.6E-5) +
  
  #Pre-lockdown
  geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-02-07"),y=4.575E-5),label="Pre-Lockdown",color = "black", size = 2) +
  geom_vline(data=us.rural.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-05-01"),y=4.575E-5),label="Lockdown 1", color = "black", size = 2) +
  geom_vline(data=us.rural.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-08-15"),y=4.575E-5),label="Re-Opening", color = "black", size = 2) +
  geom_vline(data=us.rural.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 2
  geom_rect(data=us.rural.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=4.55E-5,ymax=4.6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.rural.weekly.df,aes(x=as.Date("2020-12-10"),y=4.575E-5),label="Lockdown 2", color = "black", size = 2) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("US_rural_time_series.pdf")

us.rural.vs.urban <- plot_grid(us.urban.plot, us.rural.plot, labels = c('A', 'B'), ncol = 1) 
ggsave("US_rural_vs_urban.pdf", us.rural.vs.urban)
us.rural.vs.urban 

US time analysis: interrupted series time analysis data preparation.

#Use function in other NO2_Exploratory Analysis to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.

#Used code:
# gsutil -m cp \
# >   "file names" \
# >   COVID-19_NO2/US_time_analysis_2020

# in terminal to copy all files from google cloud storage into folders in working directory. 

#2019 data.
us.rastlist.2019 <- list.files(path = "US_time_analysis_2019", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2019 <- seq(as.Date("2018-12-31"), as.Date("2019-12-29"), by=7) #List of dates starting Dec 31 2018. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2019/", x)) %>% flip(direction='y')  
  r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ], weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)  
  Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
  Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6)) 
  Period <- rep(2019, length.out = nrow(final.df)) 
  US.2019.weekly <- cbind(final.df, Date, Week, Period) 
  rm(r.mean, r.vals)
  names(US.2019.weekly)[12] <- "NO2_Concentration"
  US.2019.weekly <- US.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- us.rastlist.2019
y <- 1:length(dates.2019)
US.2019.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn) 

#2020 data.  
us.rastlist.2020 <- list.files(path = "US_time_analysis_2020", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2020 <- seq(as.Date("2019-12-30"), as.Date("2021-01-03"), by=7) #List of dates starting Dec 30th 2019. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("US_time_analysis_2020/", x)) %>% flip(direction='y')  
  r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ], weights = 'area', 'mean') 
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="United States", ]@data, r.mean)   
  Date <- as.factor(as.Date(dates.2020[[y]]))
  Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6)) 
  Period <- rep(2020, length.out = nrow(final.df)) 
  US.2020.weekly <- cbind(final.df, Date, Week, Period)     # Add new column to data
  rm(r.mean, r.vals)
  names(US.2020.weekly)[12] <- "NO2_Concentration"
  US.2020.weekly <- US.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- us.rastlist.2020
y <- 1:length(dates.2020)
US.2020.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn) 
US.weekly.df <- rbind(US.2019.weekly.df, US.2020.weekly.df) 
US.weekly.df$Date <- as.Date(US.weekly.df$Date)
US.weekly.df$Period <- as.factor(US.weekly.df$Period)
write.csv(US.weekly.df, file = 'time_series_csv/us.weekly.NO2.csv')
rm(US.weekly.df)

US time analysis: interrupted series time analysis plots.

us.weekly.df <- read.csv(file = 'time_series_csv/us.weekly.NO2.csv')
us.weekly.df$Date <- as.Date(us.weekly.df$Date)
us.weekly.df$Period <- as.factor(us.weekly.df$Period)

ggplot(us.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  theme_bw() +
  time.series.theme() +
  labs(fill = "Year of Measured Pollution") +
  ggtitle("US NO2 in 2019 vs 2020") +
  ylab("NO2 Concentration (mol/m^2)") +
  scale_x_date(date_labels = "%b",date_breaks = "1 month",expand=c(0,0),limits=c(as.Date("2019-12-25",format="%Y-%m-%d"),as.Date("2020-12-31",format="%Y-%m-%d"))) +
ylim(0, 3E-5) +
  
  #Pre-lockdown
  geom_rect(data=us.weekly.df,aes(xmin=as.Date("2019-12-25",format="%Y-%m-%d"),xmax=as.Date("2020-03-19",format="%Y-%m-%d"),ymin=2.95E-5,ymax=3E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=us.weekly.df,aes(x=as.Date("2020-02-07"),y=2.975E-5),label="Pre-Lockdown",color = "black", size = 2) +
  geom_vline(data=us.weekly.df,xintercept = as.Date("2020-03-19",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-03-19",format="%Y-%m-%d"),xmax=as.Date("2020-06-05",format="%Y-%m-%d"),ymin=2.95E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.weekly.df,aes(x=as.Date("2020-05-01"),y=2.975E-5),label="Lockdown 1", color = "black", size = 2) +
  geom_vline(data=us.weekly.df,xintercept=as.Date("2020-06-05",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-06-05",format="%Y-%m-%d"),xmax=as.Date("2020-11-16",format="%Y-%m-%d"), ymin=2.95E-5,ymax=3E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=us.weekly.df,aes(x=as.Date("2020-08-15"),y=2.975E-5),label="Re-Opening", color = "black", size = 2) +
  geom_vline(data=us.weekly.df,xintercept=as.Date("2020-11-16",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 2
  geom_rect(data=us.weekly.df,aes(xmin=as.Date("2020-11-16",format="%Y-%m-%d"),xmax=as.Date("2020-12-31",format="%Y-%m-%d"), ymin=2.95E-5,ymax=3E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=us.weekly.df,aes(x=as.Date("2020-12-10"),y=2.975E-5),label="Lockdown 2", color = "black", size = 2) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("US_time_series.pdf")

China time analysis: interrupted series time analysis data preparation.

#Use function in other NO2_Exploratory Analysis file to filter out each week from Jan-Dec 2019 and Jan-Dec 2020 for comparison.

#Used code:
# gsutil -m cp \
# >   "file names" \
# >   COVID-19_NO2/China_time_analysis_2020

# in terminal to copy all files from google cloud storage into folders in working directory. 

#2019 data. 
#Creates list of raster files from folder to call in function. 
china.rastlist.2019 <- list.files(path = "China_time_analysis_2019", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2019 <- seq(as.Date("2018-11-05"), as.Date("2019-11-10"), by=7) #List of dates starting Dec 31 2018. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("China_time_analysis_2019/", x)) %>% flip(direction='y')
  r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ], weights = 'area', 'mean')
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)  
  Date <- as.factor(as.Date(dates.2019[[y]] %m+% years(1))) #Adds 1 year to date to match x axis of 2020 dates.
  Week <- paste0(as.character(as.Date(dates.2019[[y]])), " to ", as.character(as.Date(dates.2019[[y]]) + 6)) 
  Period <- rep("2018-2019", length.out = nrow(final.df)) 
  China.2019.weekly <- cbind(final.df, Date, Week, Period) 
  rm(r.mean, r.vals)
  names(China.2019.weekly)[12] <- "NO2_Concentration"
  China.2019.weekly <- China.2019.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- china.rastlist.2019
y <- 1:length(dates.2019)
China.2019.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn) 

#2020 data.  
#Creates list of raster files from folder to call in function. 
china.rastlist.2020 <- list.files(path = "China_time_analysis_2020", pattern='.tif$', 
all.files=TRUE, full.names=FALSE)

#Creates list of dates to call in function. 
dates.2020 <- seq(as.Date("2019-11-04"), as.Date("2020-11-08"), by=7) #List of dates starting Dec 30th 2019. 

#Function to make data frame for weekly average NO2 concentration. 
raster.extract.fxn <- function(x, y) {
  r <- raster(paste0("China_time_analysis_2020/", x)) %>% flip(direction='y')  
  r.vals <- exact_extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ], weights = 'area', 'mean')
  r.mean <- mean(r.vals, na.rm = TRUE)
  final.df <- data.frame(wrld_simpl[wrld_simpl@data$NAME=="China", ]@data, r.mean)   
  Date <- as.factor(as.Date(dates.2020[[y]]))
  Week <- paste0(as.character(as.Date(dates.2020[[y]])), " to ", as.character(as.Date(dates.2020[[y]]) + 6)) 
  Period <- rep("2019-2020", length.out = nrow(final.df)) 
  China.2020.weekly <- cbind(final.df, Date, Week, Period)     # Add new column to data
  rm(r.mean, r.vals)
  names(China.2020.weekly)[12] <- "NO2_Concentration"
  China.2020.weekly <- China.2020.weekly %>% select(Period, Date, Week, NO2_Concentration)
}

x <- china.rastlist.2020
y <- 1:length(dates.2020)
China.2020.weekly.df <- purrr::map2_dfr(x, y, raster.extract.fxn) 
China.weekly.df <- rbind(China.2019.weekly.df, China.2020.weekly.df) 
China.weekly.df$Date <- as.Date(China.weekly.df$Date)
China.weekly.df$Period <- as.factor(China.weekly.df$Period)
max(China.weekly.df$NO2_Concentration)
write.csv(China.weekly.df, file = 'time_series_csv/china.weekly.NO2.csv')
rm(China.weekly.df)

China time analysis: interrupted series time analysis plots.

china.weekly.df <- read.csv(file = 'time_series_csv/china.weekly.NO2.csv')
china.weekly.df$Date <- as.Date(china.weekly.df$Date)
china.weekly.df$Period <- as.factor(china.weekly.df$Period)

ggplot(china.weekly.df, aes(x = Date, y = NO2_Concentration, group = Period, color = Period)) +
  geom_line(size = 0.75) + 
  labs(fill = "Year of Measured Pollution") +
  ggtitle("China NO2 in 2019 vs 2020") +
  ylab("NO2 Concentration (mol/m^2)") +
  scale_x_date(date_labels = "%b-%Y",date_breaks = "1 month",limits=c(as.Date("2019-11-01",format="%Y-%m-%d"),as.Date("2020-11-10",format="%Y-%m-%d"))) +
ylim(0,6E-5) +
  theme_bw() +
  time.series.theme() +

  #Pre-lockdown
  geom_rect(data=china.weekly.df,aes(xmin=as.Date("2019-11-01",format="%Y-%m-%d"),xmax=as.Date("2020-01-23",format="%Y-%m-%d"),ymin=5.9E-5,ymax=6E-5),fill = "#c0c0c0",color = NA, alpha=0.02) +
  geom_text(data=china.weekly.df,aes(x=as.Date("2019-12-15"),y=5.95E-5),label="Pre-Lockdown",color = "black", size = 2) +
  geom_vline(data=china.weekly.df,xintercept = as.Date("2020-01-23",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Lockdown 1
  geom_rect(data=china.weekly.df,aes(xmin=as.Date("2020-01-23",format="%Y-%m-%d"),xmax=as.Date("2020-04-08",format="%Y-%m-%d"),ymin=5.9E-5,ymax=6E-5),fill = "#fa9796",color = NA,alpha=0.02) +
  geom_text(data=china.weekly.df,aes(x=as.Date("2020-03-01"),y=5.95E-5),label="Lockdown 1", color = "black", size = 2) +
  geom_vline(data=china.weekly.df,xintercept=as.Date("2020-04-08",format="%Y-%m-%d"),linetype="dashed",colour="black",size=0.75,alpha=0.8) + 
  #Re-opening Green PHASE
  geom_rect(data=china.weekly.df,aes(xmin=as.Date("2020-04-08",format="%Y-%m-%d"),xmax=as.Date("2020-11-08",format="%Y-%m-%d"), ymin=5.9E-5,ymax=6E-5),fill = "#b5dea5",color = NA,alpha=0.02) +
  geom_text(data=china.weekly.df,aes(x=as.Date("2020-08-01"),y=5.95E-5),label="Re-Opening", color = "black", size = 2) +
  scale_alpha_manual(values = c(0.4, 1.1)) + 
  scale_color_manual(values = c("gray", "blue", "black")) +
  ggsave("China_time_series.pdf")

US time analysis plots.

#Converting before lockdown and after lockdown rasters for US to data frames.
US.NO2.change.fxn <- function(file) {
  r <- raster(paste0('./US_NO2_comparison/', file)) %>%
  flip(direction='y') 
  df <- r %>% crop(., extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ])) %>% 
  mask(., wrld_simpl[wrld_simpl@data$NAME=="United States", ]) %>%
  rasterToPoints(spatial = TRUE) %>%
  data.frame() %>%
  select(NO2_Concentration = 1, Longitude = 2, Latitude = 3) 
  df
}
files <- c("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif", "US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif", "US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")

US.NO2.list <- purrr::map(files, US.NO2.change.fxn)

#Plotting before and after maps. 
US.NO2.change.map.fxn <- function(x, y, t1, t2) {
p1 <- ggplot() +
 geom_raster(data = x, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle(paste("US NO2 Concentrations (Sentinel Satellite)", t1)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100), limits=c(0, 10E-5)) +
    north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
    scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
  xlim(-130,-65) +
  ylim(23,49) +
  coord_quickmap()
p2 <- ggplot() +
 geom_raster(data = y, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="United States", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle(paste("US NO2 Concentrations (Sentinel Satellite)", t2)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100), limits=c(0, 10E-5)) +
    north(x.min = -130, y.min = 23, x.max = -65, y.max = 49, anchor = c(x=-119, y=32), symbol=12) +
    scalebar(x.min = -130, y.min = 23, x.max = -65, y.max = 49, dist = 200, dist_unit = 'km', transform = TRUE, anchor = c(x=-120, y=27.5), st.size = 1.5, model = 'WGS84') +
  xlim(-130,-65) +
  ylim(23,49) +
  coord_quickmap()
plot_grid(p1, p2, labels = "AUTO", ncol = 1)
}

US.NO2.change.map.fxn(data.frame(US.NO2.list[1]), data.frame(US.NO2.list[2]), "Jan-March 2019", "March-May 2019")

US.NO2.change.map.fxn(data.frame(US.NO2.list[3]), data.frame(US.NO2.list[4]), "Jan-March 2020", "March-May 2020")

US.NO2.change.map.fxn(data.frame(US.NO2.list[5]), data.frame(US.NO2.list[6]), "Jan-March 2021", "March-May 2021")

US time analysis (violin plots).

#Function for finding medians over each time period. 
US_NO2_median <- function(x) {
  r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  lapply(r.vals, FUN=median, na.rm = TRUE)
}
US_NO2_median("US_NO2_2020-01-19_start.tif")
## [[1]]
## [1] 1.965982e-05
US_NO2_median("US_NO2_2020-03-19_start.tif")
## [[1]]
## [1] 1.799675e-05
#Function for finding averages over each time period. 
US_NO2_average <- function(x) {
  r <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  lapply(r.vals, FUN=mean, na.rm = TRUE)
}
US_NO2_average("US_NO2_2020-01-19_start.tif")
## [[1]]
## [1] 2.182116e-05
US_NO2_average("US_NO2_2020-03-19_start.tif")
## [[1]]
## [1] 1.934275e-05
#2019 violin plot
US_2019_violin <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('US NO2: 2019 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2019_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")

#2020 violin plot
US_2020_violin <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2020", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2020", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('US NO2 Before and After COVID-19') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2020_violin("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")

#2021 violin plot
US_2021_violin <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2021", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2021", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('US NO2: 2021 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_2021_violin("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")

#All violin plots in 1 graph:
US_violin <- function(a, b, c, d, e, f) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (a))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2019", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./US_NO2_Comparison/", (b))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2019", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  r3 <- raster(paste0("./US_NO2_Comparison/", (c))) %>% flip(direction='y')
  r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r3, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r3 <- r3 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2020", length.out = nrow(r3)) %>% as.data.frame()
  r3 <- cbind(vec, r3)
  names(r3)[1] <- "Timeframe"
  names(r3)[4] <- "NO2_Concentration"
  r4 <- raster(paste0("./US_NO2_Comparison/", (d))) %>% flip(direction='y')
  r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r4, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r4 <- r4 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2020", length.out = nrow(r4)) %>% as.data.frame()
  r4 <- cbind(vec, r4)
  names(r4)[1] <- "Timeframe"
  names(r4)[4] <- "NO2_Concentration"
  r5 <- raster(paste0("./US_NO2_Comparison/", (e))) %>% flip(direction='y')
  r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r5, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r5 <- r5 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2021", length.out = nrow(r5)) %>% as.data.frame()
  r5 <- cbind(vec, r5)
  names(r5)[1] <- "Timeframe"
  names(r5)[4] <- "NO2_Concentration"
  r6 <- raster(paste0("./US_NO2_Comparison/", (f))) %>% flip(direction='y')
  r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r6, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r6 <- r6 %>% rasterToPoints() %>% data.frame()
  vec <- rep("March-May 2021", length.out = nrow(r6)) %>% as.data.frame()
  r6 <- cbind(vec, r6)
  names(r6)[1] <- "Timeframe"
  names(r6)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2, r3, r4, r5, r6)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('US NO2: 2019 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    theme(axis.text.x = element_text(angle = 45)) +
    scale_x_discrete(limits=c("Jan-March 2019", "March-May 2019", "Jan-March 2020", "March-May 2020", "Jan-March 2021", "March-May 2021")) +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
US_violin("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif", "US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif", "US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")

US time analysis (box plots).

US_2019_boxplot <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='US NO2: 2019 Seasonal Change', 
          ylab='Date',
          names = c('2019 Jan-Mar', '2019 Mar-May'))
}
US_2019_boxplot("US_NO2_2019-01-19_start.tif", "US_NO2_2019-03-19_start.tif")

US_2020_boxplot <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='US NO2 Before and After COVID-19', 
          ylab='Date',
          names = c('2020 Jan-Mar', '2020 Mar-May'))
}
US_2020_boxplot("US_NO2_2020-01-19_start.tif", "US_NO2_2020-03-19_start.tif")

US_2021_boxplot <- function(x, y) {
  r1 <- raster(paste0("./US_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  r2 <- raster(paste0("./US_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="United States", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="United States", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='US NO2: 2021 Seasonal Change', 
          ylab='Date',
          names = c('2021 Jan-Mar', '2021 Mar-May'))
}
US_2021_boxplot("US_NO2_2021-01-19_start.tif", "US_NO2_2021-03-19_start.tif")

China time analysis plots.

#Converting before lockdown and after lockdown rasters for China to data frames.
China.NO2.change.fxn <- function(file) {
  r <- raster(paste0('./China_NO2_comparison/', file)) %>%
  flip(direction='y') 
  df <- r %>% crop(., extent(wrld_simpl[wrld_simpl@data$NAME=="China", ])) %>% 
  mask(., wrld_simpl[wrld_simpl@data$NAME=="China", ]) %>%
  rasterToPoints(spatial = TRUE) %>%
  data.frame() %>%
  select(NO2_Concentration = 1, Longitude = 2, Latitude = 3) 
  df
}

files <- c("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif", "China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif", "China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")

China.NO2.list <- purrr::map(files, China.NO2.change.fxn)

#Plotting before and after maps. 
China.NO2.change.map.fxn <- function(x, y, t1, t2) {
p1 <- ggplot() +
 geom_raster(data = x, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle(paste("China NO2 Concentrations (Sentinel Satellite)", t1)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(mol/m^2)", colours = myPalette(100), limits=c(0, 5.5E-4)) +
    north(x.min = 73, y.min = 18, x.max = 135, y.max = 54, anchor = c(x=84, y=28), symbol=12) +
    scalebar(x.min = 73, y.min = 18, x.max = 135, y.max = 54, dist = 250, dist_unit = 'km', transform = TRUE, anchor = c(x=84, y=22.5), st.size = 1.5, height = 0.02, model = 'WGS84') +
  xlim(73, 135) +
  ylim(18, 54) +
  coord_quickmap()
p2 <- ggplot() +
 geom_raster(data = y, aes(x = Longitude, y = Latitude, fill = NO2_Concentration)) + 
 geom_polygon(data = wrld_simpl[wrld_simpl@data$NAME=="China", ], aes(x = long, y = lat, group = group), colour = "black", size = 0.5, fill = NA) +
  my_theme() + 
  ggtitle(paste("China NO2 Concentrations (Sentinel Satellite)", t2)) +
  scale_fill_gradientn(name = "NO2 Concentration \n(ppb)", colours = myPalette(100), limits=c(0, 5.5E-4)) +
    north(x.min = 73, y.min = 18, x.max = 135, y.max = 54, anchor = c(x=84, y=28), symbol=12) +
    scalebar(x.min = 73, y.min = 18, x.max = 135, y.max = 54, dist = 250, dist_unit = 'km', transform = TRUE, anchor = c(x=84, y=22.5), st.size = 1.5, height = 0.02, model = 'WGS84') +
  xlim(73, 135) +
  ylim(18, 54) +
  coord_quickmap()
plot_grid(p1, p2, labels = "AUTO", ncol = 1)
}

China.NO2.change.map.fxn(data.frame(China.NO2.list[1]), data.frame(China.NO2.list[2]), "Nov-Jan 2019", "Jan-March 2019")

China.NO2.change.map.fxn(data.frame(China.NO2.list[3]), data.frame(China.NO2.list[4]), "Nov-Jan 2020", "Jan-March 2020")

China.NO2.change.map.fxn(data.frame(China.NO2.list[5]), data.frame(China.NO2.list[6]), "Nov-Jan 2021", "Jan-March 2021")

China time analysis (violin).

#Function for finding medians over each time period. 
China_NO2_median <- function(x) {
  r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  lapply(r.vals, FUN=median, na.rm = TRUE)
}
China_NO2_median("China_NO2_2019-11-23_start.tif")
## [[1]]
## [1] 1.576366e-05
China_NO2_median("China_NO2_2020-01-23_start.tif")
## [[1]]
## [1] 1.231765e-05
#Function for finding averages over each time period. 
China_NO2_average <- function(x) {
  r <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r.vals <- raster::extract(r, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  lapply(r.vals, FUN=mean, na.rm = TRUE)
}
China_NO2_average("China_NO2_2019-11-23_start.tif")
## [[1]]
## [1] 4.035193e-05
China_NO2_average("China_NO2_2020-01-23_start.tif") 
## [[1]]
## [1] 2.19273e-05
#2019 violin plot
China_2019_violin <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('China NO2: 2019 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019")) +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2019_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")

#2020 violin plot
China_2020_violin <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2019-Jan2020", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2020", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('China NO2 Before and After COVID-19') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    scale_x_discrete(limits=c("Nov2019-Jan2020", "Jan-March 2020")) +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2020_violin("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")

#2021 violin plot
China_2021_violin <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2020-Jan2021", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2021", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('China NO2: 2021 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    scale_x_discrete(limits=c("Nov2020-Jan2021", "Jan-March 2021")) +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_2021_violin("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")

#All violin plots in 1 graph: 
China_violin <- function(a, b, c, d, e, f) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (a))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r1 <- r1 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2018-Jan2019", length.out = nrow(r1)) %>% as.data.frame()
  r1 <- cbind(vec, r1)
  names(r1)[1] <- "Timeframe"
  names(r1)[4] <- "NO2_Concentration"
  r2 <- raster(paste0("./China_NO2_Comparison/", (b))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- r2 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2019", length.out = nrow(r2)) %>% as.data.frame()
  r2 <- cbind(vec, r2)
  names(r2)[1] <- "Timeframe"
  names(r2)[4] <- "NO2_Concentration"
  r3 <- raster(paste0("./China_NO2_Comparison/", (c))) %>% flip(direction='y')
  r3 <- crop(r3, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r3, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r3 <- r3 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2019-Jan2020", length.out = nrow(r3)) %>% as.data.frame()
  r3 <- cbind(vec, r3)
  names(r3)[1] <- "Timeframe"
  names(r3)[4] <- "NO2_Concentration"
  r4 <- raster(paste0("./China_NO2_Comparison/", (d))) %>% flip(direction='y')
  r4 <- crop(r4, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r4, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r4 <- r4 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2020", length.out = nrow(r4)) %>% as.data.frame()
  r4 <- cbind(vec, r4)
  names(r4)[1] <- "Timeframe"
  names(r4)[4] <- "NO2_Concentration"
  r5 <- raster(paste0("./China_NO2_Comparison/", (e))) %>% flip(direction='y')
  r5 <- crop(r5, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r5, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r5 <- r5 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Nov2020-Jan2021", length.out = nrow(r5)) %>% as.data.frame()
  r5 <- cbind(vec, r5)
  names(r5)[1] <- "Timeframe"
  names(r5)[4] <- "NO2_Concentration"
  r6 <- raster(paste0("./China_NO2_Comparison/", (f))) %>% flip(direction='y')
  r6 <- crop(r6, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r6, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r6 <- r6 %>% rasterToPoints() %>% data.frame()
  vec <- rep("Jan-March 2021", length.out = nrow(r6)) %>% as.data.frame()
  r6 <- cbind(vec, r6)
  names(r6)[1] <- "Timeframe"
  names(r6)[4] <- "NO2_Concentration"
  df <- rbind(r1, r2, r3, r4, r5, r6)
  ggplot(data = df, aes(x=Timeframe, y=NO2_Concentration, fill=Timeframe)) + 
    geom_violin(fill="gray") +
    ggtitle('China NO2: 2019 Seasonal Change') +
    ylab('NO2 Concentration (mol/m^2)') + xlab('Timeframe') +
    theme(axis.text.x = element_text(angle = 45)) +
    scale_x_discrete(limits=c("Nov2018-Jan2019", "Jan-March 2019", "Nov2019-Jan2020", "Jan-March 2020", "Nov2020-Jan2021", "Jan-March 2021")) +
    stat_summary(fun.data=mean_sdl, fun.args = list(mult = 2), geom="pointrange", color="red")
}
China_violin("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif", "China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif", "China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")

China time analysis (boxplots).

China_2019_change <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='China NO2: 2019 Seasonal Change', 
          ylab='Date',
          names = c('Nov2018-Jan2019', 'Jan-March 2019'))
}
China_2019_change("China_NO2_2018-11-23_start.tif", "China_NO2_2019-01-23_start.tif")

China_2020_change <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='China NO2 Before and After COVID-19', 
          ylab='Date',
          names = c('Nov2019-Jan2020', 'Jan-March 2020'))
}
China_2020_change("China_NO2_2019-11-23_start.tif", "China_NO2_2020-01-23_start.tif")

China_2021_change <- function(x, y) {
  r1 <- raster(paste0("./China_NO2_Comparison/", (x))) %>% flip(direction='y')
  r1 <- crop(r1, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r1, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  r2 <- raster(paste0("./China_NO2_Comparison/", (y))) %>% flip(direction='y')
  r2 <- crop(r2, extent(wrld_simpl[wrld_simpl@data$NAME=="China", ]))
  mask(r2, wrld_simpl[wrld_simpl@data$NAME=="China", ])
  s <- stack(r1, r2)
  boxplot(s, col=c('red', 'blue'), 
          main='China NO2: 2021 Seasonal Change', 
          ylab='Date',
          names = c('Nov2020-Jan2021', 'jan-March 2021'))
}
China_2021_change("China_NO2_2020-11-23_start.tif", "China_NO2_2021-01-23_start.tif")